From 66130f734283e0d696b06df2c7ad54e92a4ac1fc Mon Sep 17 00:00:00 2001
From: cpviolator <dmhowarth26@gmail.com>
Date: Fri, 30 Apr 2021 17:34:21 -0700
Subject: [PATCH 01/32] Add gf test interface to allow fine grained control
 over the GF testing

---
 tests/gauge_alg_test.cpp            | 64 ++++++++++++------------
 tests/heatbath_test.cpp             |  3 +-
 tests/utils/command_line_params.cpp | 75 ++++++++++++++++++++++-------
 tests/utils/command_line_params.h   | 12 +++++
 4 files changed, 102 insertions(+), 52 deletions(-)

diff --git a/tests/gauge_alg_test.cpp b/tests/gauge_alg_test.cpp
index ebfcaaa0b8..410d2304bd 100644
--- a/tests/gauge_alg_test.cpp
+++ b/tests/gauge_alg_test.cpp
@@ -21,8 +21,25 @@
 
 using namespace quda;
 
-class GaugeAlgTest : public ::testing::Test {
+class GaugeAlgTest : public ::testing::Test
+{  
  protected:
+
+  QudaGaugeParam param;
+  
+  Timer a0,a1;
+  double2 detu;
+  double3 plaq;
+  cudaGaugeField *U;
+  int nsteps;
+  int nhbsteps;
+  int novrsteps;
+  bool coldstart;
+  double beta_value;
+
+  RNG * randstates;
+
+  
   void SetReunitarizationConsts(){
     const double unitarize_eps = 1e-14;
     const double max_error = 1e-10;
@@ -118,11 +135,11 @@ class GaugeAlgTest : public ::testing::Test {
     randstates = new RNG(gParam, 1234);
     randstates->Init();
 
-    nsteps = 10;
-    nhbsteps = 4;
-    novrsteps = 4;
-    coldstart = false;
-    beta_value = 6.2;
+    nsteps = heatbath_num_steps;
+    nhbsteps = heatbath_num_heatbath_per_step;
+    novrsteps = heatbath_num_overrelax_per_step;
+    coldstart = heatbath_coldstart;
+    beta_value = heatbath_beta_value;
 
     a0.Start(__func__, __FILE__, __LINE__);
     a1.Start(__func__, __FILE__, __LINE__);
@@ -175,38 +192,18 @@ class GaugeAlgTest : public ::testing::Test {
     randstates->Release();
     delete randstates;
   }
-
-  QudaGaugeParam param;
-
-  Timer a0,a1;
-  double2 detu;
-  double3 plaq;
-  cudaGaugeField *U;
-  int nsteps;
-  int nhbsteps;
-  int novrsteps;
-  bool coldstart;
-  double beta_value;
-  RNG * randstates;
-
 };
 
 TEST_F(GaugeAlgTest, Generation)
 {
   detu = getLinkDeterminant(*U);
-  plaq = plaquette(*U);
-  bool testgen = false;
-  //check plaquette value for beta = 6.2
-  if (plaq.x < 0.614 && plaq.x > 0.611 && plaq.y < 0.614 && plaq.y > 0.611) testgen = true;
-
-  if (testgen) { ASSERT_TRUE(CheckDeterminant(detu)); }
+  ASSERT_TRUE(CheckDeterminant(detu));
 }
 
 TEST_F(GaugeAlgTest, Landau_Overrelaxation)
 {
-  const int reunit_interval = 10;
   printfQuda("Landau gauge fixing with overrelaxation\n");
-  gaugeFixingOVR(*U, 4, 100, 10, 1.5, 0, reunit_interval, 1);
+  gaugeFixingOVR(*U, 4, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, gf_theta_condition);
   auto plaq_gf = plaquette(*U);
   printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
   ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
@@ -214,9 +211,8 @@ TEST_F(GaugeAlgTest, Landau_Overrelaxation)
 
 TEST_F(GaugeAlgTest, Coulomb_Overrelaxation)
 {
-  const int reunit_interval = 10;
   printfQuda("Coulomb gauge fixing with overrelaxation\n");
-  gaugeFixingOVR(*U, 3, 100, 10, 1.5, 0, reunit_interval, 1);
+  gaugeFixingOVR(*U, 3, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, gf_theta_condition);
   auto plaq_gf = plaquette(*U);
   printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
   ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
@@ -226,7 +222,7 @@ TEST_F(GaugeAlgTest, Landau_FFT)
 {
   if (!checkDimsPartitioned()) {
     printfQuda("Landau gauge fixing with steepest descent method with FFTs\n");
-    gaugeFixingFFT(*U, 4, 100, 10, 0.08, 0, 0, 1);
+    gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, gf_theta_condition);
     auto plaq_gf = plaquette(*U);
     printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
     ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
@@ -237,7 +233,7 @@ TEST_F(GaugeAlgTest, Coulomb_FFT)
 {
   if (!checkDimsPartitioned()) {
     printfQuda("Coulomb gauge fixing with steepest descent method with FFTs\n");
-    gaugeFixingFFT(*U, 3, 100, 10, 0.08, 0, 0, 1);
+    gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, gf_theta_condition);
     auto plaq_gf = plaquette(*U);
     printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
     ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
@@ -252,8 +248,10 @@ int main(int argc, char **argv)
   int test_rc = 0;
   xdim=ydim=zdim=tdim=32;
 
-  // command line options
+  // command line options  
   auto app = make_app();
+  add_gaugefix_option_group(app);
+  add_heatbath_option_group(app);
   try {
     app->parse(argc, argv);
   } catch (const CLI::ParseError &e) {
diff --git a/tests/heatbath_test.cpp b/tests/heatbath_test.cpp
index 98e69f613b..a0734f1516 100644
--- a/tests/heatbath_test.cpp
+++ b/tests/heatbath_test.cpp
@@ -53,9 +53,10 @@ void display_test_info()
 }
 
 int main(int argc, char **argv)
-{
+{  
   // command line options
   auto app = make_app();
+  add_heatbath_option_group(app);
   try {
     app->parse(argc, argv);
   } catch (const CLI::ParseError &e) {
diff --git a/tests/utils/command_line_params.cpp b/tests/utils/command_line_params.cpp
index 0a300b19af..b0e312b69d 100644
--- a/tests/utils/command_line_params.cpp
+++ b/tests/utils/command_line_params.cpp
@@ -221,6 +221,12 @@ quda::mgarray<QudaPrecision> mg_eig_save_prec = {};
 bool mg_eig_coarse_guess = false;
 bool mg_eig_preserve_deflation = false;
 
+int eofa_pm = 1;
+double eofa_shift = -1.2345;
+double eofa_mq1 = 1.0;
+double eofa_mq2 = 0.085;
+double eofa_mq3 = 1.0;
+
 double heatbath_beta_value = 6.2;
 int heatbath_warmup_steps = 10;
 int heatbath_num_steps = 10;
@@ -228,12 +234,6 @@ int heatbath_num_heatbath_per_step = 5;
 int heatbath_num_overrelax_per_step = 5;
 bool heatbath_coldstart = false;
 
-int eofa_pm = 1;
-double eofa_shift = -1.2345;
-double eofa_mq1 = 1.0;
-double eofa_mq2 = 0.085;
-double eofa_mq3 = 1.0;
-
 double stout_smear_rho = 0.1;
 double stout_smear_epsilon = -0.25;
 double ape_smear_rho = 0.6;
@@ -243,6 +243,16 @@ int wflow_steps = 100;
 QudaWFlowType wflow_type = QUDA_WFLOW_TYPE_WILSON;
 int measurement_interval = 5;
 
+int gf_gauge_dir = 4;
+int gf_maxiter = 10000;
+int gf_verbosity_interval = 100;
+double gf_ovr_relaxation_boost = 1.5;
+double gf_fft_alpha = 0.8;
+int gf_reunit_interval = 10;
+double gf_tolerance = 1e-6;
+bool gf_theta_condition = false;
+bool gf_fft_autotune = false;
+
 QudaContractType contract_type = QUDA_CONTRACT_TYPE_OPEN;
 
 std::array<int, 4> grid_partition = {1, 1, 1, 1};
@@ -495,18 +505,6 @@ std::shared_ptr<QUDAApp> make_app(std::string app_description, std::string app_n
   quda_app->add_option("--gaussian-sigma", gaussian_sigma,
                        "Width of the Gaussian noise used for random gauge field contruction (default 0.2)");
 
-  quda_app->add_option("--heatbath-beta", heatbath_beta_value, "Beta value used in heatbath test (default 6.2)");
-  quda_app->add_option("--heatbath-coldstart", heatbath_coldstart,
-                       "Whether to use a cold or hot start in heatbath test (default false)");
-  quda_app->add_option("--heatbath-num-hb-per-step", heatbath_num_heatbath_per_step,
-                       "Number of heatbath hits per heatbath step (default 5)");
-  quda_app->add_option("--heatbath-num-or-per-step", heatbath_num_overrelax_per_step,
-                       "Number of overrelaxation hits per heatbath step (default 5)");
-  quda_app->add_option("--heatbath-num-steps", heatbath_num_steps,
-                       "Number of measurement steps in heatbath test (default 10)");
-  quda_app->add_option("--heatbath-warmup-steps", heatbath_warmup_steps,
-                       "Number of warmup steps in heatbath test (default 10)");
-
   quda_app->add_option("--inv-type", inv_type, "The type of solver to use (default cg)")
     ->transform(CLI::QUDACheckedTransformer(inverter_type_map));
   quda_app->add_option("--inv-deflate", inv_deflate, "Deflate the inverter using the eigensolver");
@@ -1016,6 +1014,47 @@ void add_su3_option_group(std::shared_ptr<QUDAApp> quda_app)
                       "Measure the field energy and topological charge every Nth step (default 5) ");
 }
 
+void add_heatbath_option_group(std::shared_ptr<QUDAApp> quda_app)
+{
+  // Option group for heatbath related options
+  auto opgroup = quda_app->add_option_group("heatbath", "Options controlling heatbath tests");
+  opgroup->add_option("--heatbath-beta", heatbath_beta_value, "Beta value used in heatbath test (default 6.2)");
+  opgroup->add_option("--heatbath-coldstart", heatbath_coldstart,
+                       "Whether to use a cold or hot start in heatbath test (default false)");
+  opgroup->add_option("--heatbath-num-hb-per-step", heatbath_num_heatbath_per_step,
+                       "Number of heatbath hits per heatbath step (default 5)");
+  opgroup->add_option("--heatbath-num-or-per-step", heatbath_num_overrelax_per_step,
+                       "Number of overrelaxation hits per heatbath step (default 5)");
+  opgroup->add_option("--heatbath-num-steps", heatbath_num_steps,
+                       "Number of measurement steps in heatbath test (default 10)");
+  opgroup->add_option("--heatbath-warmup-steps", heatbath_warmup_steps,
+                       "Number of warmup steps in heatbath test (default 10)");
+}
+
+void add_gaugefix_option_group(std::shared_ptr<QUDAApp> quda_app)
+{
+  // Option group for gauge fixing related options
+  auto opgroup = quda_app->add_option_group("gaugefix", "Options controlling gauge fixing tests");
+  opgroup->add_option("--gf-dir", gf_gauge_dir, "The orthogonal direction of teh gauge fixing, 3=Coulomb, 4=Landau. (default 4)");
+  opgroup->add_option("--gf-maxiter", gf_maxiter,
+                       "The maximun number of gauge fixing iterations to be applied (default 10000) ");
+  opgroup->add_option("--gf-verbosity-interval", gf_verbosity_interval,
+                       "Print the gauge fixing progress every N steps (default 100)");
+  opgroup->add_option("--gf-ovr-relaxation-boost", gf_ovr_relaxation_boost,
+                       "The overrelaxation boost parameter for the overrelaxation method (default 1.5)");
+  opgroup->add_option("--gf-fft-alpha", gf_fft_alpha,
+                       "The Alpha parameter in the FFT method (default 0.8)");
+  opgroup->add_option("--gf-reunit-interval", gf_reunit_interval,
+                       "Reunitarise the gauge field every N steps (default 10)");
+  opgroup->add_option("--gf-tol", gf_tolerance,
+                       "The tolerance of the gauge fixing quality (default 1e-6)");
+  opgroup->add_option("--gf-theta-condition", gf_theta_condition,
+                       "Use the theta value to determine the gauge fixing if true. If false, use the delta value (default false)");
+  opgroup->add_option("--gf-fft-autotune", gf_fft_autotune,
+		       "In the FFT method, automatically adjust the alpha parameter if the quality begins to diverge (default false)");
+}
+
+
 void add_comms_option_group(std::shared_ptr<QUDAApp> quda_app)
 {
   auto opgroup
diff --git a/tests/utils/command_line_params.h b/tests/utils/command_line_params.h
index ce0bcaf718..12f16c2046 100644
--- a/tests/utils/command_line_params.h
+++ b/tests/utils/command_line_params.h
@@ -133,6 +133,8 @@ void add_deflation_option_group(std::shared_ptr<QUDAApp> quda_app);
 void add_multigrid_option_group(std::shared_ptr<QUDAApp> quda_app);
 void add_eofa_option_group(std::shared_ptr<QUDAApp> quda_app);
 void add_su3_option_group(std::shared_ptr<QUDAApp> quda_app);
+void add_heatbath_option_group(std::shared_ptr<QUDAApp> quda_app);
+void add_gaugefix_option_group(std::shared_ptr<QUDAApp> quda_app);
 void add_comms_option_group(std::shared_ptr<QUDAApp> quda_app);
 
 template <typename T> std::string inline get_string(CLI::TransformPairs<T> &map, T val)
@@ -369,6 +371,16 @@ extern int wflow_steps;
 extern QudaWFlowType wflow_type;
 extern int measurement_interval;
 
+extern int gf_gauge_dir;
+extern int gf_maxiter;
+extern int gf_verbosity_interval;
+extern double gf_ovr_relaxation_boost;
+extern double gf_fft_alpha;
+extern int gf_reunit_interval;
+extern double gf_tolerance;
+extern bool gf_theta_condition;
+extern bool gf_fft_autotune;
+
 extern QudaContractType contract_type;
 
 extern std::array<int, 4> grid_partition;

From 0071401c16af2770a7aae0b691f51fec4af0c504 Mon Sep 17 00:00:00 2001
From: cpviolator <dmhowarth26@gmail.com>
Date: Fri, 30 Apr 2021 18:48:50 -0700
Subject: [PATCH 02/32] Move the gauge al test to a ctest, make a new interface
 to the gauge fixing that allows for fine grained control and gauge IO

---
 lib/interface_quda.cpp    |  87 ++++----
 tests/CMakeLists.txt      |   4 +
 tests/gauge_alg_ctest.cpp | 274 ++++++++++++++++++++++++
 tests/gauge_alg_test.cpp  | 428 +++++++++++++++++++-------------------
 tests/su3_test.cpp        |  35 ++--
 5 files changed, 543 insertions(+), 285 deletions(-)
 create mode 100644 tests/gauge_alg_ctest.cpp

diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp
index 3af70859d8..6638a32046 100644
--- a/lib/interface_quda.cpp
+++ b/lib/interface_quda.cpp
@@ -236,8 +236,8 @@ static TimeProfile profileMomAction("momActionQuda");
 static TimeProfile profileEnd("endQuda");
 
 //!< Profiler for GaugeFixing
-static TimeProfile GaugeFixFFTQuda("GaugeFixFFTQuda");
-static TimeProfile GaugeFixOVRQuda("GaugeFixOVRQuda");
+static TimeProfile profileGaugeFixFFT("gaugeFixFFTQuda");
+static TimeProfile profileGaugeFixOVR("gaugeFixOVRQuda");
 
 //!< Profiler for toal time spend between init and end
 static TimeProfile profileInit2End("initQuda-endQuda",false);
@@ -1535,6 +1535,8 @@ void endQuda(void)
     profileProject.Print();
     profilePhase.Print();
     profileMomAction.Print();
+    profileGaugeFixOVR.Print();
+    profileGaugeFixFFT.Print();
     profileEnd.Print();
 
     profileInit2End.Print();
@@ -5809,11 +5811,11 @@ int computeGaugeFixingOVRQuda(void *gauge, const unsigned int gauge_dir, const u
                               const unsigned int reunit_interval, const unsigned int stopWtheta, QudaGaugeParam *param,
                               double *timeinfo)
 {
-  GaugeFixOVRQuda.TPSTART(QUDA_PROFILE_TOTAL);
-
+  profileGaugeFixOVR.TPSTART(QUDA_PROFILE_TOTAL);
+  
   checkGaugeParam(param);
 
-  GaugeFixOVRQuda.TPSTART(QUDA_PROFILE_INIT);
+  profileGaugeFixOVR.TPSTART(QUDA_PROFILE_INIT);
   GaugeFieldParam gParam(gauge, *param);
   auto *cpuGauge = new cpuGaugeField(gParam);
 
@@ -5824,44 +5826,37 @@ int computeGaugeFixingOVRQuda(void *gauge, const unsigned int gauge_dir, const u
   gParam.setPrecision(gParam.Precision(), true);
   auto *cudaInGauge = new cudaGaugeField(gParam);
 
-  GaugeFixOVRQuda.TPSTOP(QUDA_PROFILE_INIT);
-  GaugeFixOVRQuda.TPSTART(QUDA_PROFILE_H2D);
+  profileGaugeFixOVR.TPSTOP(QUDA_PROFILE_INIT);
+  profileGaugeFixOVR.TPSTART(QUDA_PROFILE_H2D);
 
-  ///if (!param->use_resident_gauge) {   // load fields onto the device
   cudaInGauge->loadCPUField(*cpuGauge);
- /* } else { // or use resident fields already present
-    if (!gaugePrecise) errorQuda("No resident gauge field allocated");
-    cudaInGauge = gaugePrecise;
-    gaugePrecise = nullptr;
-  } */
 
-  GaugeFixOVRQuda.TPSTOP(QUDA_PROFILE_H2D);
+  profileGaugeFixOVR.TPSTOP(QUDA_PROFILE_H2D);
 
   if (comm_size() == 1) {
     // perform the update
-    GaugeFixOVRQuda.TPSTART(QUDA_PROFILE_COMPUTE);
+    profileGaugeFixOVR.TPSTART(QUDA_PROFILE_COMPUTE);
     gaugeFixingOVR(*cudaInGauge, gauge_dir, Nsteps, verbose_interval, relax_boost, tolerance, reunit_interval,
                    stopWtheta);
-    GaugeFixOVRQuda.TPSTOP(QUDA_PROFILE_COMPUTE);
+    profileGaugeFixOVR.TPSTOP(QUDA_PROFILE_COMPUTE);
   } else {
-    cudaGaugeField *cudaInGaugeEx = createExtendedGauge(*cudaInGauge, R, GaugeFixOVRQuda);
+    cudaGaugeField *cudaInGaugeEx = createExtendedGauge(*cudaInGauge, R, profileGaugeFixOVR);
 
-    // perform the update
-    GaugeFixOVRQuda.TPSTART(QUDA_PROFILE_COMPUTE);
+    // Perform the update
+    profileGaugeFixOVR.TPSTART(QUDA_PROFILE_COMPUTE);
     gaugeFixingOVR(*cudaInGaugeEx, gauge_dir, Nsteps, verbose_interval, relax_boost, tolerance, reunit_interval,
                    stopWtheta);
-    GaugeFixOVRQuda.TPSTOP(QUDA_PROFILE_COMPUTE);
+    profileGaugeFixOVR.TPSTOP(QUDA_PROFILE_COMPUTE);
 
-    //HOW TO COPY BACK TO CPU: cudaInGaugeEx->cpuGauge
     copyExtendedGauge(*cudaInGauge, *cudaInGaugeEx, QUDA_CUDA_FIELD_LOCATION);
   }
-
-  // copy the gauge field back to the host
-  GaugeFixOVRQuda.TPSTART(QUDA_PROFILE_D2H);
+  
+  // Copy the gauge field back to the host
+  profileGaugeFixOVR.TPSTART(QUDA_PROFILE_D2H);
   cudaInGauge->saveCPUField(*cpuGauge);
-  GaugeFixOVRQuda.TPSTOP(QUDA_PROFILE_D2H);
+  profileGaugeFixOVR.TPSTOP(QUDA_PROFILE_D2H);
 
-  GaugeFixOVRQuda.TPSTOP(QUDA_PROFILE_TOTAL);
+  profileGaugeFixOVR.TPSTOP(QUDA_PROFILE_TOTAL);
 
   if (param->make_resident_gauge) {
     if (gaugePrecise != nullptr) delete gaugePrecise;
@@ -5871,9 +5866,9 @@ int computeGaugeFixingOVRQuda(void *gauge, const unsigned int gauge_dir, const u
   }
 
   if(timeinfo){
-    timeinfo[0] = GaugeFixOVRQuda.Last(QUDA_PROFILE_H2D);
-    timeinfo[1] = GaugeFixOVRQuda.Last(QUDA_PROFILE_COMPUTE);
-    timeinfo[2] = GaugeFixOVRQuda.Last(QUDA_PROFILE_D2H);
+    timeinfo[0] = profileGaugeFixOVR.Last(QUDA_PROFILE_H2D);
+    timeinfo[1] = profileGaugeFixOVR.Last(QUDA_PROFILE_COMPUTE);
+    timeinfo[2] = profileGaugeFixOVR.Last(QUDA_PROFILE_D2H);
   }
 
   return 0;
@@ -5883,11 +5878,11 @@ int computeGaugeFixingFFTQuda(void* gauge, const unsigned int gauge_dir,  const
   const unsigned int verbose_interval, const double alpha, const unsigned int autotune, const double tolerance, \
   const unsigned int  stopWtheta, QudaGaugeParam* param , double* timeinfo)
 {
-  GaugeFixFFTQuda.TPSTART(QUDA_PROFILE_TOTAL);
+  profileGaugeFixFFT.TPSTART(QUDA_PROFILE_TOTAL);
 
   checkGaugeParam(param);
 
-  GaugeFixFFTQuda.TPSTART(QUDA_PROFILE_INIT);
+  profileGaugeFixFFT.TPSTART(QUDA_PROFILE_INIT);
 
   GaugeFieldParam gParam(gauge, *param);
   auto *cpuGauge = new cpuGaugeField(gParam);
@@ -5900,33 +5895,27 @@ int computeGaugeFixingFFTQuda(void* gauge, const unsigned int gauge_dir,  const
   auto *cudaInGauge = new cudaGaugeField(gParam);
 
 
-  GaugeFixFFTQuda.TPSTOP(QUDA_PROFILE_INIT);
+  profileGaugeFixFFT.TPSTOP(QUDA_PROFILE_INIT);
 
-  GaugeFixFFTQuda.TPSTART(QUDA_PROFILE_H2D);
+  profileGaugeFixFFT.TPSTART(QUDA_PROFILE_H2D);
 
-  //if (!param->use_resident_gauge) {   // load fields onto the device
   cudaInGauge->loadCPUField(*cpuGauge);
-  /*} else { // or use resident fields already present
-    if (!gaugePrecise) errorQuda("No resident gauge field allocated");
-    cudaInGauge = gaugePrecise;
-    gaugePrecise = nullptr;
-  } */
 
-  GaugeFixFFTQuda.TPSTOP(QUDA_PROFILE_H2D);
+  profileGaugeFixFFT.TPSTOP(QUDA_PROFILE_H2D);
 
   // perform the update
-  GaugeFixFFTQuda.TPSTART(QUDA_PROFILE_COMPUTE);
+  profileGaugeFixFFT.TPSTART(QUDA_PROFILE_COMPUTE);
 
   gaugeFixingFFT(*cudaInGauge, gauge_dir, Nsteps, verbose_interval, alpha, autotune, tolerance, stopWtheta);
 
-  GaugeFixFFTQuda.TPSTOP(QUDA_PROFILE_COMPUTE);
+  profileGaugeFixFFT.TPSTOP(QUDA_PROFILE_COMPUTE);
 
   // copy the gauge field back to the host
-  GaugeFixFFTQuda.TPSTART(QUDA_PROFILE_D2H);
+  profileGaugeFixFFT.TPSTART(QUDA_PROFILE_D2H);
   cudaInGauge->saveCPUField(*cpuGauge);
-  GaugeFixFFTQuda.TPSTOP(QUDA_PROFILE_D2H);
+  profileGaugeFixFFT.TPSTOP(QUDA_PROFILE_D2H);
 
-  GaugeFixFFTQuda.TPSTOP(QUDA_PROFILE_TOTAL);
+  profileGaugeFixFFT.TPSTOP(QUDA_PROFILE_TOTAL);
 
   if (param->make_resident_gauge) {
     if (gaugePrecise != nullptr) delete gaugePrecise;
@@ -5934,11 +5923,11 @@ int computeGaugeFixingFFTQuda(void* gauge, const unsigned int gauge_dir,  const
   } else {
     delete cudaInGauge;
   }
-
+  
   if (timeinfo) {
-    timeinfo[0] = GaugeFixFFTQuda.Last(QUDA_PROFILE_H2D);
-    timeinfo[1] = GaugeFixFFTQuda.Last(QUDA_PROFILE_COMPUTE);
-    timeinfo[2] = GaugeFixFFTQuda.Last(QUDA_PROFILE_D2H);
+    timeinfo[0] = profileGaugeFixFFT.Last(QUDA_PROFILE_H2D);
+    timeinfo[1] = profileGaugeFixFFT.Last(QUDA_PROFILE_COMPUTE);
+    timeinfo[2] = profileGaugeFixFFT.Last(QUDA_PROFILE_D2H);
   }
 
   return 0;
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 7c357e67ac..ed9e9df62f 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -239,6 +239,10 @@ if(QUDA_GAUGE_ALG)
   add_executable(gauge_alg_test gauge_alg_test.cpp)
   target_link_libraries(gauge_alg_test ${TEST_LIBS})
   quda_checkbuildtest(gauge_alg_test QUDA_BUILD_ALL_TESTS)
+
+  add_executable(gauge_alg_ctest gauge_alg_ctest.cpp)
+  target_link_libraries(gauge_alg_ctest ${TEST_LIBS})
+  quda_checkbuildtest(gauge_alg_ctest QUDA_BUILD_ALL_TESTS)
   install(TARGETS gauge_alg_test ${QUDA_EXCLUDE_FROM_INSTALL} DESTINATION ${CMAKE_INSTALL_BINDIR})
 
   add_executable(heatbath_test heatbath_test.cpp)
diff --git a/tests/gauge_alg_ctest.cpp b/tests/gauge_alg_ctest.cpp
new file mode 100644
index 0000000000..410d2304bd
--- /dev/null
+++ b/tests/gauge_alg_ctest.cpp
@@ -0,0 +1,274 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <quda.h>
+#include <quda_internal.h>
+#include <gauge_field.h>
+
+#include <comm_quda.h>
+#include <host_utils.h>
+#include <command_line_params.h>
+#include <gauge_tools.h>
+
+#include <pgauge_monte.h>
+#include <random_quda.h>
+#include <unitarization_links.h>
+
+#include <qio_field.h>
+
+#include <gtest/gtest.h>
+
+using namespace quda;
+
+class GaugeAlgTest : public ::testing::Test
+{  
+ protected:
+
+  QudaGaugeParam param;
+  
+  Timer a0,a1;
+  double2 detu;
+  double3 plaq;
+  cudaGaugeField *U;
+  int nsteps;
+  int nhbsteps;
+  int novrsteps;
+  bool coldstart;
+  double beta_value;
+
+  RNG * randstates;
+
+  
+  void SetReunitarizationConsts(){
+    const double unitarize_eps = 1e-14;
+    const double max_error = 1e-10;
+    const int reunit_allow_svd = 1;
+    const int reunit_svd_only  = 0;
+    const double svd_rel_error = 1e-6;
+    const double svd_abs_error = 1e-6;
+    setUnitarizeLinksConstants(unitarize_eps, max_error,
+                               reunit_allow_svd, reunit_svd_only,
+                               svd_rel_error, svd_abs_error);
+
+  }
+
+  bool checkDimsPartitioned()
+  {
+    if (comm_dim_partitioned(0) || comm_dim_partitioned(1) || comm_dim_partitioned(2) || comm_dim_partitioned(3))
+      return true;
+    return false;
+  }
+
+  bool comparePlaquette(double3 a, double3 b){
+    double a0,a1,a2;
+    a0 = std::abs(a.x - b.x);
+    a1 = std::abs(a.y - b.y);
+    a2 = std::abs(a.z - b.z);
+    double prec_val = 1.0e-5;
+    if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1.0e-15;
+    if ((a0 < prec_val) && (a1 < prec_val) && (a2 < prec_val)) return true;
+    return false;
+  }
+
+  bool CheckDeterminant(double2 detu){
+    double prec_val = 5e-8;
+    if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1.0e-15;
+    if (std::abs(1.0 - detu.x) < prec_val && std::abs(detu.y) < prec_val) return true;
+    return false;
+  }
+
+  virtual void SetUp() {
+    setVerbosity(QUDA_VERBOSE);
+
+    param = newQudaGaugeParam();
+
+    // Setup gauge container.
+    param.cpu_prec = prec;
+    param.cpu_prec = prec;
+    param.cuda_prec = prec;
+    param.reconstruct = link_recon;
+    param.cuda_prec_sloppy = prec;
+    param.reconstruct_sloppy = link_recon;
+
+    param.type = QUDA_WILSON_LINKS;
+    param.gauge_order = QUDA_MILC_GAUGE_ORDER;
+
+    param.X[0] = xdim;
+    param.X[1] = ydim;
+    param.X[2] = zdim;
+    param.X[3] = tdim;
+    setDims(param.X);
+
+    param.anisotropy = 1.0;  //don't support anisotropy for now!!!!!!
+    param.t_boundary = QUDA_PERIODIC_T;
+    param.gauge_fix = QUDA_GAUGE_FIXED_NO;
+    param.ga_pad = 0;
+
+    GaugeFieldParam gParam(0, param);
+    gParam.pad = 0;
+    gParam.ghostExchange = QUDA_GHOST_EXCHANGE_NO;
+    gParam.create      = QUDA_NULL_FIELD_CREATE;
+    gParam.link_type   = param.type;
+    gParam.reconstruct = param.reconstruct;
+    gParam.setPrecision(gParam.Precision(), true);
+
+#ifdef MULTI_GPU
+    int y[4];
+    int R[4] = {0,0,0,0};
+    for(int dir=0; dir<4; ++dir) if(comm_dim_partitioned(dir)) R[dir] = 2;
+    for(int dir=0; dir<4; ++dir) y[dir] = param.X[dir] + 2 * R[dir];
+    int pad = 0;
+    GaugeFieldParam gParamEx(y, prec, link_recon,
+                             pad, QUDA_VECTOR_GEOMETRY, QUDA_GHOST_EXCHANGE_EXTENDED);
+    gParamEx.create = QUDA_ZERO_FIELD_CREATE;
+    gParamEx.order = gParam.order;
+    gParamEx.siteSubset = QUDA_FULL_SITE_SUBSET;
+    gParamEx.t_boundary = gParam.t_boundary;
+    gParamEx.nFace = 1;
+    for(int dir=0; dir<4; ++dir) gParamEx.r[dir] = R[dir];
+    U = new cudaGaugeField(gParamEx);
+#else
+    U = new cudaGaugeField(gParam);
+#endif
+    // CURAND random generator initialization
+    randstates = new RNG(gParam, 1234);
+    randstates->Init();
+
+    nsteps = heatbath_num_steps;
+    nhbsteps = heatbath_num_heatbath_per_step;
+    novrsteps = heatbath_num_overrelax_per_step;
+    coldstart = heatbath_coldstart;
+    beta_value = heatbath_beta_value;
+
+    a0.Start(__func__, __FILE__, __LINE__);
+    a1.Start(__func__, __FILE__, __LINE__);
+
+    int *num_failures_h = (int *)mapped_malloc(sizeof(int));
+    int *num_failures_d = (int *)get_mapped_device_pointer(num_failures_h);
+
+    if (link_recon != QUDA_RECONSTRUCT_8 && coldstart)
+      InitGaugeField(*U);
+    else
+      InitGaugeField(*U, *randstates);
+
+    // Reunitarization setup
+    SetReunitarizationConsts();
+    plaquette(*U);
+
+    for(int step=1; step<=nsteps; ++step){
+      printfQuda("Step %d\n",step);
+      Monte(*U, *randstates, beta_value, nhbsteps, novrsteps);
+
+      //Reunitarize gauge links...
+      *num_failures_h = 0;
+      unitarizeLinks(*U, num_failures_d);
+      qudaDeviceSynchronize();
+      if (*num_failures_h > 0) errorQuda("Error in the unitarization\n");
+
+      plaquette(*U);
+    }
+    a1.Stop(__func__, __FILE__, __LINE__);
+
+    printfQuda("Time Monte -> %.6f s\n", a1.Last());
+    plaq = plaquette(*U);
+    printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z);
+
+    host_free(num_failures_h);
+  }
+
+  virtual void TearDown() {
+    detu = getLinkDeterminant(*U);
+    double2 tru = getLinkTrace(*U);
+    printfQuda("Det: %.16e:%.16e\n", detu.x, detu.y);
+    printfQuda("Tr: %.16e:%.16e\n", tru.x/3.0, tru.y/3.0);
+
+    delete U;
+    //Release all temporary memory used for data exchange between GPUs in multi-GPU mode
+    PGaugeExchangeFree();
+
+    a0.Stop(__func__, __FILE__, __LINE__);
+    printfQuda("Time -> %.6f s\n", a0.Last());
+    randstates->Release();
+    delete randstates;
+  }
+};
+
+TEST_F(GaugeAlgTest, Generation)
+{
+  detu = getLinkDeterminant(*U);
+  ASSERT_TRUE(CheckDeterminant(detu));
+}
+
+TEST_F(GaugeAlgTest, Landau_Overrelaxation)
+{
+  printfQuda("Landau gauge fixing with overrelaxation\n");
+  gaugeFixingOVR(*U, 4, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, gf_theta_condition);
+  auto plaq_gf = plaquette(*U);
+  printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
+  ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
+}
+
+TEST_F(GaugeAlgTest, Coulomb_Overrelaxation)
+{
+  printfQuda("Coulomb gauge fixing with overrelaxation\n");
+  gaugeFixingOVR(*U, 3, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, gf_theta_condition);
+  auto plaq_gf = plaquette(*U);
+  printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
+  ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
+}
+
+TEST_F(GaugeAlgTest, Landau_FFT)
+{
+  if (!checkDimsPartitioned()) {
+    printfQuda("Landau gauge fixing with steepest descent method with FFTs\n");
+    gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, gf_theta_condition);
+    auto plaq_gf = plaquette(*U);
+    printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
+    ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
+  }
+}
+
+TEST_F(GaugeAlgTest, Coulomb_FFT)
+{
+  if (!checkDimsPartitioned()) {
+    printfQuda("Coulomb gauge fixing with steepest descent method with FFTs\n");
+    gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, gf_theta_condition);
+    auto plaq_gf = plaquette(*U);
+    printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
+    ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
+  }
+}
+
+int main(int argc, char **argv)
+{
+  // initalize google test, includes command line options
+  ::testing::InitGoogleTest(&argc, argv);
+  // return code for google test
+  int test_rc = 0;
+  xdim=ydim=zdim=tdim=32;
+
+  // command line options  
+  auto app = make_app();
+  add_gaugefix_option_group(app);
+  add_heatbath_option_group(app);
+  try {
+    app->parse(argc, argv);
+  } catch (const CLI::ParseError &e) {
+    return app->exit(e);
+  }
+
+  initComms(argc, argv, gridsize_from_cmdline);
+
+  // Ensure gtest prints only from rank 0
+  ::testing::TestEventListeners &listeners = ::testing::UnitTest::GetInstance()->listeners();
+  if (comm_rank() != 0) { delete listeners.Release(listeners.default_result_printer()); }
+
+  initQuda(device_ordinal);
+  test_rc = RUN_ALL_TESTS();
+  endQuda();
+
+  finalizeComms();
+
+  return test_rc;
+}
diff --git a/tests/gauge_alg_test.cpp b/tests/gauge_alg_test.cpp
index 410d2304bd..5579460ab7 100644
--- a/tests/gauge_alg_test.cpp
+++ b/tests/gauge_alg_test.cpp
@@ -9,6 +9,7 @@
 #include <comm_quda.h>
 #include <host_utils.h>
 #include <command_line_params.h>
+#include <misc.h>
 #include <gauge_tools.h>
 
 #include <pgauge_monte.h>
@@ -21,254 +22,247 @@
 
 using namespace quda;
 
-class GaugeAlgTest : public ::testing::Test
-{  
- protected:
-
-  QudaGaugeParam param;
+void display_test_info()
+{
+  printfQuda("running the following test:\n");
   
-  Timer a0,a1;
-  double2 detu;
-  double3 plaq;
-  cudaGaugeField *U;
-  int nsteps;
-  int nhbsteps;
-  int novrsteps;
-  bool coldstart;
-  double beta_value;
+  printfQuda("prec    sloppy_prec    link_recon  sloppy_link_recon S_dimension T_dimension Ls_dimension\n");
+  printfQuda("%s   %s             %s            %s            %d/%d/%d          %d         %d\n", get_prec_str(prec),
+             get_prec_str(prec_sloppy), get_recon_str(link_recon), get_recon_str(link_recon_sloppy), xdim, ydim, zdim,
+             tdim, Lsdim);
+
+  printfQuda("Grid partition info:     X  Y  Z  T\n");
+  printfQuda("                         %d  %d  %d  %d\n", dimPartitioned(0), dimPartitioned(1), dimPartitioned(2),
+             dimPartitioned(3));
+}
 
-  RNG * randstates;
 
+void SetReunitarizationConsts(){
+  const double unitarize_eps = 1e-14;
+  const double max_error = 1e-10;
+  const int reunit_allow_svd = 1;
+  const int reunit_svd_only  = 0;
+  const double svd_rel_error = 1e-6;
+  const double svd_abs_error = 1e-6;
+  setUnitarizeLinksConstants(unitarize_eps, max_error,
+			     reunit_allow_svd, reunit_svd_only,
+			     svd_rel_error, svd_abs_error);
   
-  void SetReunitarizationConsts(){
-    const double unitarize_eps = 1e-14;
-    const double max_error = 1e-10;
-    const int reunit_allow_svd = 1;
-    const int reunit_svd_only  = 0;
-    const double svd_rel_error = 1e-6;
-    const double svd_abs_error = 1e-6;
-    setUnitarizeLinksConstants(unitarize_eps, max_error,
-                               reunit_allow_svd, reunit_svd_only,
-                               svd_rel_error, svd_abs_error);
-
-  }
-
-  bool checkDimsPartitioned()
-  {
-    if (comm_dim_partitioned(0) || comm_dim_partitioned(1) || comm_dim_partitioned(2) || comm_dim_partitioned(3))
-      return true;
-    return false;
-  }
-
-  bool comparePlaquette(double3 a, double3 b){
-    double a0,a1,a2;
-    a0 = std::abs(a.x - b.x);
-    a1 = std::abs(a.y - b.y);
-    a2 = std::abs(a.z - b.z);
-    double prec_val = 1.0e-5;
-    if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1.0e-15;
-    if ((a0 < prec_val) && (a1 < prec_val) && (a2 < prec_val)) return true;
-    return false;
-  }
-
-  bool CheckDeterminant(double2 detu){
-    double prec_val = 5e-8;
-    if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1.0e-15;
-    if (std::abs(1.0 - detu.x) < prec_val && std::abs(detu.y) < prec_val) return true;
-    return false;
-  }
-
-  virtual void SetUp() {
-    setVerbosity(QUDA_VERBOSE);
-
-    param = newQudaGaugeParam();
-
-    // Setup gauge container.
-    param.cpu_prec = prec;
-    param.cpu_prec = prec;
-    param.cuda_prec = prec;
-    param.reconstruct = link_recon;
-    param.cuda_prec_sloppy = prec;
-    param.reconstruct_sloppy = link_recon;
-
-    param.type = QUDA_WILSON_LINKS;
-    param.gauge_order = QUDA_MILC_GAUGE_ORDER;
-
-    param.X[0] = xdim;
-    param.X[1] = ydim;
-    param.X[2] = zdim;
-    param.X[3] = tdim;
-    setDims(param.X);
-
-    param.anisotropy = 1.0;  //don't support anisotropy for now!!!!!!
-    param.t_boundary = QUDA_PERIODIC_T;
-    param.gauge_fix = QUDA_GAUGE_FIXED_NO;
-    param.ga_pad = 0;
-
-    GaugeFieldParam gParam(0, param);
-    gParam.pad = 0;
-    gParam.ghostExchange = QUDA_GHOST_EXCHANGE_NO;
-    gParam.create      = QUDA_NULL_FIELD_CREATE;
-    gParam.link_type   = param.type;
-    gParam.reconstruct = param.reconstruct;
-    gParam.setPrecision(gParam.Precision(), true);
-
-#ifdef MULTI_GPU
-    int y[4];
-    int R[4] = {0,0,0,0};
-    for(int dir=0; dir<4; ++dir) if(comm_dim_partitioned(dir)) R[dir] = 2;
-    for(int dir=0; dir<4; ++dir) y[dir] = param.X[dir] + 2 * R[dir];
-    int pad = 0;
-    GaugeFieldParam gParamEx(y, prec, link_recon,
-                             pad, QUDA_VECTOR_GEOMETRY, QUDA_GHOST_EXCHANGE_EXTENDED);
-    gParamEx.create = QUDA_ZERO_FIELD_CREATE;
-    gParamEx.order = gParam.order;
-    gParamEx.siteSubset = QUDA_FULL_SITE_SUBSET;
-    gParamEx.t_boundary = gParam.t_boundary;
-    gParamEx.nFace = 1;
-    for(int dir=0; dir<4; ++dir) gParamEx.r[dir] = R[dir];
-    U = new cudaGaugeField(gParamEx);
-#else
-    U = new cudaGaugeField(gParam);
-#endif
-    // CURAND random generator initialization
-    randstates = new RNG(gParam, 1234);
-    randstates->Init();
-
-    nsteps = heatbath_num_steps;
-    nhbsteps = heatbath_num_heatbath_per_step;
-    novrsteps = heatbath_num_overrelax_per_step;
-    coldstart = heatbath_coldstart;
-    beta_value = heatbath_beta_value;
-
-    a0.Start(__func__, __FILE__, __LINE__);
-    a1.Start(__func__, __FILE__, __LINE__);
-
-    int *num_failures_h = (int *)mapped_malloc(sizeof(int));
-    int *num_failures_d = (int *)get_mapped_device_pointer(num_failures_h);
-
-    if (link_recon != QUDA_RECONSTRUCT_8 && coldstart)
-      InitGaugeField(*U);
-    else
-      InitGaugeField(*U, *randstates);
-
-    // Reunitarization setup
-    SetReunitarizationConsts();
-    plaquette(*U);
-
-    for(int step=1; step<=nsteps; ++step){
-      printfQuda("Step %d\n",step);
-      Monte(*U, *randstates, beta_value, nhbsteps, novrsteps);
-
-      //Reunitarize gauge links...
-      *num_failures_h = 0;
-      unitarizeLinks(*U, num_failures_d);
-      qudaDeviceSynchronize();
-      if (*num_failures_h > 0) errorQuda("Error in the unitarization\n");
-
-      plaquette(*U);
-    }
-    a1.Stop(__func__, __FILE__, __LINE__);
-
-    printfQuda("Time Monte -> %.6f s\n", a1.Last());
-    plaq = plaquette(*U);
-    printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z);
-
-    host_free(num_failures_h);
-  }
-
-  virtual void TearDown() {
-    detu = getLinkDeterminant(*U);
-    double2 tru = getLinkTrace(*U);
-    printfQuda("Det: %.16e:%.16e\n", detu.x, detu.y);
-    printfQuda("Tr: %.16e:%.16e\n", tru.x/3.0, tru.y/3.0);
-
-    delete U;
-    //Release all temporary memory used for data exchange between GPUs in multi-GPU mode
-    PGaugeExchangeFree();
-
-    a0.Stop(__func__, __FILE__, __LINE__);
-    printfQuda("Time -> %.6f s\n", a0.Last());
-    randstates->Release();
-    delete randstates;
-  }
-};
-
-TEST_F(GaugeAlgTest, Generation)
-{
-  detu = getLinkDeterminant(*U);
-  ASSERT_TRUE(CheckDeterminant(detu));
-}
-
-TEST_F(GaugeAlgTest, Landau_Overrelaxation)
-{
-  printfQuda("Landau gauge fixing with overrelaxation\n");
-  gaugeFixingOVR(*U, 4, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, gf_theta_condition);
-  auto plaq_gf = plaquette(*U);
-  printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
-  ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
 }
 
-TEST_F(GaugeAlgTest, Coulomb_Overrelaxation)
+bool checkDimsPartitioned()
 {
-  printfQuda("Coulomb gauge fixing with overrelaxation\n");
-  gaugeFixingOVR(*U, 3, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, gf_theta_condition);
-  auto plaq_gf = plaquette(*U);
-  printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
-  ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
+  if (comm_dim_partitioned(0) || comm_dim_partitioned(1) || comm_dim_partitioned(2) || comm_dim_partitioned(3))
+    return true;
+  return false;
 }
 
-TEST_F(GaugeAlgTest, Landau_FFT)
+bool comparePlaquette(double3 a, double3 b)
 {
-  if (!checkDimsPartitioned()) {
-    printfQuda("Landau gauge fixing with steepest descent method with FFTs\n");
-    gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, gf_theta_condition);
-    auto plaq_gf = plaquette(*U);
-    printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
-    ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
-  }
+  printfQuda("Plaq:    %.16e, %.16e, %.16e\n", a.x, a.y, a.z);
+  printfQuda("Plaq_gf: %.16e, %.16e, %.16e\n", b.x, b.y, b.z);   
+  double a0,a1,a2;
+  a0 = std::abs(a.x - b.x);
+  a1 = std::abs(a.y - b.y);
+  a2 = std::abs(a.z - b.z);
+  double prec_val = 1.0e-5;
+  if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1.0e-15;
+  return ((a0 < prec_val) && (a1 < prec_val) && (a2 < prec_val));
 }
 
-TEST_F(GaugeAlgTest, Coulomb_FFT)
+bool checkDeterminant(double2 detu)
 {
-  if (!checkDimsPartitioned()) {
-    printfQuda("Coulomb gauge fixing with steepest descent method with FFTs\n");
-    gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, gf_theta_condition);
-    auto plaq_gf = plaquette(*U);
-    printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
-    ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
-  }
+  printfQuda("Det: %.16e: %.16e\n", detu.x, detu.y);
+  double prec_val = 5e-8;
+  if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1.0e-15;
+  return std::abs(1.0 - detu.x) < prec_val && std::abs(detu.y) < prec_val;
 }
 
 int main(int argc, char **argv)
 {
-  // initalize google test, includes command line options
-  ::testing::InitGoogleTest(&argc, argv);
-  // return code for google test
-  int test_rc = 0;
-  xdim=ydim=zdim=tdim=32;
-
   // command line options  
   auto app = make_app();
   add_gaugefix_option_group(app);
   add_heatbath_option_group(app);
+  CLI::TransformPairs<int> test_type_map {{"OVR", 0}, {"FFT", 1}};
+  app->add_option("--test", test_type, "Test method")->transform(CLI::CheckedTransformer(test_type_map));
   try {
     app->parse(argc, argv);
   } catch (const CLI::ParseError &e) {
     return app->exit(e);
   }
 
+  if (prec_sloppy == QUDA_INVALID_PRECISION) prec_sloppy = prec;
+  if (link_recon_sloppy == QUDA_RECONSTRUCT_INVALID) link_recon_sloppy = link_recon;
+
+  // initialize QMP/MPI, QUDA comms grid and RNG (host_utils.cpp)
   initComms(argc, argv, gridsize_from_cmdline);
 
-  // Ensure gtest prints only from rank 0
-  ::testing::TestEventListeners &listeners = ::testing::UnitTest::GetInstance()->listeners();
-  if (comm_rank() != 0) { delete listeners.Release(listeners.default_result_printer()); }
+  // call srand() with a rank-dependent seed
+  initRand();
+
+  display_test_info();
 
+  // initialize the QUDA library
   initQuda(device_ordinal);
-  test_rc = RUN_ALL_TESTS();
-  endQuda();
 
-  finalizeComms();
+  // *** QUDA parameters begin here.
+  setVerbosity(QUDA_VERBOSE);
+  QudaGaugeParam param = newQudaGaugeParam();
 
-  return test_rc;
+  double3 plaq;
+  cudaGaugeField *U;
+  int nsteps = heatbath_num_steps;
+  int nhbsteps = heatbath_num_heatbath_per_step;
+  int novrsteps = heatbath_num_overrelax_per_step;
+  bool coldstart = heatbath_coldstart;
+  double beta_value = heatbath_beta_value;
+  
+  RNG * randstates;
+  
+  // Setup gauge container.
+  param.cpu_prec = prec;
+  param.cpu_prec = prec;
+  param.cuda_prec = prec;
+  param.reconstruct = link_recon;
+  param.cuda_prec_sloppy = prec;
+  param.reconstruct_sloppy = link_recon;
+  
+  param.type = QUDA_WILSON_LINKS;
+  param.gauge_order = QUDA_MILC_GAUGE_ORDER;
+  
+  param.X[0] = xdim;
+  param.X[1] = ydim;
+  param.X[2] = zdim;
+  param.X[3] = tdim;
+  setDims(param.X);
+  
+  param.anisotropy = 1.0;  //don't support anisotropy for now!!!!!!
+  param.t_boundary = QUDA_PERIODIC_T;
+  param.gauge_fix = QUDA_GAUGE_FIXED_NO;
+  param.ga_pad = 0;
+  
+  GaugeFieldParam gParam(0, param);
+  gParam.pad = 0;
+  gParam.ghostExchange = QUDA_GHOST_EXCHANGE_NO;
+  gParam.create      = QUDA_NULL_FIELD_CREATE;
+  gParam.link_type   = param.type;
+  gParam.reconstruct = param.reconstruct;
+  gParam.setPrecision(gParam.Precision(), true);
+  
+  int y[4];
+  int R[4] = {0,0,0,0};
+  for(int dir=0; dir<4; ++dir) if(comm_dim_partitioned(dir)) R[dir] = 2;
+  for(int dir=0; dir<4; ++dir) y[dir] = param.X[dir] + 2 * R[dir];
+  int pad = 0;
+  GaugeFieldParam gParamEx(y, prec, link_recon,
+			   pad, QUDA_VECTOR_GEOMETRY, QUDA_GHOST_EXCHANGE_EXTENDED);
+  gParamEx.create = QUDA_ZERO_FIELD_CREATE;
+  gParamEx.order = gParam.order;
+  gParamEx.siteSubset = QUDA_FULL_SITE_SUBSET;
+  gParamEx.t_boundary = gParam.t_boundary;
+  gParamEx.nFace = 1;
+  for(int dir=0; dir<4; ++dir) gParamEx.r[dir] = R[dir];
+  U = new cudaGaugeField(gParamEx);
+
+  // CURAND random generator initialization
+  randstates = new RNG(gParam, 1234);
+  randstates->Init();
+    
+  int *num_failures_h = (int *)mapped_malloc(sizeof(int));
+  int *num_failures_d = (int *)get_mapped_device_pointer(num_failures_h);
+  
+  if (link_recon != QUDA_RECONSTRUCT_8 && coldstart)
+    InitGaugeField(*U);
+  else
+    InitGaugeField(*U, *randstates);
+  
+  // Reunitarization setup
+  SetReunitarizationConsts();
+  plaquette(*U);
+  
+  for(int step=1; step<=nsteps; ++step){
+    printfQuda("Step %d\n",step);
+    Monte(*U, *randstates, beta_value, nhbsteps, novrsteps);
+    
+    //Reunitarize gauge links...
+    *num_failures_h = 0;
+    unitarizeLinks(*U, num_failures_d);
+    qudaDeviceSynchronize();
+    if (*num_failures_h > 0) errorQuda("Error in the unitarization\n");
+    
+    plaquette(*U);
+  }
+  
+  plaq = plaquette(*U);
+  printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z);
+  
+  host_free(num_failures_h);
+
+  // Gauge Fixing Routines
+  //---------------------------------------------------------------------------
+  switch (test_type) {
+  case 0:  
+    printfQuda("%s gauge fixing with overrelaxation\n", gf_gauge_dir == 4 ? "Landau" : "Coulomb");
+    gaugeFixingOVR(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, gf_theta_condition);
+    comparePlaquette(plaq, plaquette(*U));
+    break;
+    
+  case 1:
+    if (!checkDimsPartitioned()) {
+      printfQuda("%s gauge fixing with steepest descent method with FFTs\n", gf_gauge_dir == 4 ? "Landau" : "Coulomb");
+      gaugeFixingFFT(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, gf_theta_condition);
+      comparePlaquette(plaq, plaquette(*U));
+    } else {
+      errorQuda("FFT gauge fixing not supported for multi GPU geometry");
+    }
+    break;
+    
+  default:
+    errorQuda("Unknown test type %d", test_type);
+  }
+
+  double2 link_trace = getLinkTrace(*U);
+  printfQuda("Tr: %.16e:%.16e\n", link_trace.x/3.0, link_trace.y/3.0);
+
+  // Save if output string is specified
+  if (strcmp(gauge_outfile,"")) {
+    
+    printfQuda("Saving the gauge field to file %s\n", gauge_outfile);
+
+    QudaGaugeParam gauge_param = newQudaGaugeParam();
+    setWilsonGaugeParam(gauge_param);
+    
+    void *cpu_gauge[4];
+    for (int dir = 0; dir < 4; dir++) { cpu_gauge[dir] = malloc(V * gauge_site_size * gauge_param.cpu_prec); }
+    
+    cudaGaugeField *gauge;
+    gauge = new cudaGaugeField(gParam);
+    
+    // copy into regular field
+    copyExtendedGauge(*gauge, *U, QUDA_CUDA_FIELD_LOCATION);    
+    saveGaugeFieldQuda((void*)cpu_gauge, (void*)gauge, &gauge_param);
+    
+    // Write to disk
+    write_gauge_field(gauge_outfile, cpu_gauge, gauge_param.cpu_prec, gauge_param.X, 0, (char**)0);
+    
+    for (int dir = 0; dir<4; dir++) free(cpu_gauge[dir]);
+    delete gauge;
+  } else {
+    printfQuda("No output file specified.\n");
+  }  
+  
+  delete U;
+  
+  //Release all temporary memory used for data exchange between GPUs in multi-GPU mode
+  PGaugeExchangeFree();
+  
+  randstates->Release();
+  delete randstates;
+  
+  freeGaugeQuda();    
+  endQuda();
+  finalizeComms();
+  
+  return 0;
 }
diff --git a/tests/su3_test.cpp b/tests/su3_test.cpp
index 86827d3096..1c0b75c3a8 100644
--- a/tests/su3_test.cpp
+++ b/tests/su3_test.cpp
@@ -114,34 +114,34 @@ int main(int argc, char **argv)
   // initialize QMP/MPI, QUDA comms grid and RNG (host_utils.cpp)
   initComms(argc, argv, gridsize_from_cmdline);
 
-  QudaGaugeParam gauge_param = newQudaGaugeParam();
-  if (prec_sloppy == QUDA_INVALID_PRECISION) 
-    prec_sloppy = prec;
-  if (link_recon_sloppy == QUDA_RECONSTRUCT_INVALID) 
-    link_recon_sloppy = link_recon;
+  if (prec_sloppy == QUDA_INVALID_PRECISION) prec_sloppy = prec;
+  if (link_recon_sloppy == QUDA_RECONSTRUCT_INVALID) link_recon_sloppy = link_recon;
+ 
+  initQuda(device_ordinal);
+  setVerbosity(verbosity);
+
+  // call srand() with a rank-dependent seed
+  initRand();
 
+  QudaGaugeParam gauge_param = newQudaGaugeParam();
   setGaugeParam(gauge_param);
   setDims(gauge_param.X);
-
+  
+  // All user inputs now defined
+  display_test_info();
+  
+  // *** QUDA parameters begin here.  
   void *gauge[4], *new_gauge[4];
-
   for (int dir = 0; dir < 4; dir++) {
     gauge[dir] = malloc(V * gauge_site_size * host_gauge_data_type_size);
     new_gauge[dir] = malloc(V * gauge_site_size * host_gauge_data_type_size);
   }
-
-  initQuda(device_ordinal);
-
-  setVerbosity(verbosity);
-
-  // call srand() with a rank-dependent seed
-  initRand();
-
+  
   constructHostGaugeField(gauge, gauge_param, argc, argv);
   // Load the gauge field to the device
   loadGaugeQuda((void *)gauge, &gauge_param);
   saveGaugeQuda(new_gauge, &gauge_param);
-
+  
   double plaq[3];
   plaqQuda(plaq);
   printfQuda("Computed plaquette gauge precise is %.16e (spatial = %.16e, temporal = %.16e)\n", plaq[0], plaq[1],
@@ -149,9 +149,6 @@ int main(int argc, char **argv)
 
 #ifdef GPU_GAUGE_TOOLS
 
-  // All user inputs now defined
-  display_test_info();
-
   // Topological charge and gauge energy
   double q_charge_check = 0.0;
   // Size of floating point data

From d9bbbfa594b51645e3058ea009ea73cab12d70e1 Mon Sep 17 00:00:00 2001
From: cpviolator <dmhowarth26@gmail.com>
Date: Fri, 30 Apr 2021 18:48:50 -0700
Subject: [PATCH 03/32] Move the gauge_alg_test to a ctest, make a new
 interface to the gauge fixing that allows for fine grained control and gauge
 IO.

---
 lib/interface_quda.cpp    |  87 ++++----
 tests/CMakeLists.txt      |   4 +
 tests/gauge_alg_ctest.cpp | 274 ++++++++++++++++++++++++
 tests/gauge_alg_test.cpp  | 428 +++++++++++++++++++-------------------
 tests/su3_test.cpp        |  35 ++--
 5 files changed, 543 insertions(+), 285 deletions(-)
 create mode 100644 tests/gauge_alg_ctest.cpp

diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp
index 3af70859d8..6638a32046 100644
--- a/lib/interface_quda.cpp
+++ b/lib/interface_quda.cpp
@@ -236,8 +236,8 @@ static TimeProfile profileMomAction("momActionQuda");
 static TimeProfile profileEnd("endQuda");
 
 //!< Profiler for GaugeFixing
-static TimeProfile GaugeFixFFTQuda("GaugeFixFFTQuda");
-static TimeProfile GaugeFixOVRQuda("GaugeFixOVRQuda");
+static TimeProfile profileGaugeFixFFT("gaugeFixFFTQuda");
+static TimeProfile profileGaugeFixOVR("gaugeFixOVRQuda");
 
 //!< Profiler for toal time spend between init and end
 static TimeProfile profileInit2End("initQuda-endQuda",false);
@@ -1535,6 +1535,8 @@ void endQuda(void)
     profileProject.Print();
     profilePhase.Print();
     profileMomAction.Print();
+    profileGaugeFixOVR.Print();
+    profileGaugeFixFFT.Print();
     profileEnd.Print();
 
     profileInit2End.Print();
@@ -5809,11 +5811,11 @@ int computeGaugeFixingOVRQuda(void *gauge, const unsigned int gauge_dir, const u
                               const unsigned int reunit_interval, const unsigned int stopWtheta, QudaGaugeParam *param,
                               double *timeinfo)
 {
-  GaugeFixOVRQuda.TPSTART(QUDA_PROFILE_TOTAL);
-
+  profileGaugeFixOVR.TPSTART(QUDA_PROFILE_TOTAL);
+  
   checkGaugeParam(param);
 
-  GaugeFixOVRQuda.TPSTART(QUDA_PROFILE_INIT);
+  profileGaugeFixOVR.TPSTART(QUDA_PROFILE_INIT);
   GaugeFieldParam gParam(gauge, *param);
   auto *cpuGauge = new cpuGaugeField(gParam);
 
@@ -5824,44 +5826,37 @@ int computeGaugeFixingOVRQuda(void *gauge, const unsigned int gauge_dir, const u
   gParam.setPrecision(gParam.Precision(), true);
   auto *cudaInGauge = new cudaGaugeField(gParam);
 
-  GaugeFixOVRQuda.TPSTOP(QUDA_PROFILE_INIT);
-  GaugeFixOVRQuda.TPSTART(QUDA_PROFILE_H2D);
+  profileGaugeFixOVR.TPSTOP(QUDA_PROFILE_INIT);
+  profileGaugeFixOVR.TPSTART(QUDA_PROFILE_H2D);
 
-  ///if (!param->use_resident_gauge) {   // load fields onto the device
   cudaInGauge->loadCPUField(*cpuGauge);
- /* } else { // or use resident fields already present
-    if (!gaugePrecise) errorQuda("No resident gauge field allocated");
-    cudaInGauge = gaugePrecise;
-    gaugePrecise = nullptr;
-  } */
 
-  GaugeFixOVRQuda.TPSTOP(QUDA_PROFILE_H2D);
+  profileGaugeFixOVR.TPSTOP(QUDA_PROFILE_H2D);
 
   if (comm_size() == 1) {
     // perform the update
-    GaugeFixOVRQuda.TPSTART(QUDA_PROFILE_COMPUTE);
+    profileGaugeFixOVR.TPSTART(QUDA_PROFILE_COMPUTE);
     gaugeFixingOVR(*cudaInGauge, gauge_dir, Nsteps, verbose_interval, relax_boost, tolerance, reunit_interval,
                    stopWtheta);
-    GaugeFixOVRQuda.TPSTOP(QUDA_PROFILE_COMPUTE);
+    profileGaugeFixOVR.TPSTOP(QUDA_PROFILE_COMPUTE);
   } else {
-    cudaGaugeField *cudaInGaugeEx = createExtendedGauge(*cudaInGauge, R, GaugeFixOVRQuda);
+    cudaGaugeField *cudaInGaugeEx = createExtendedGauge(*cudaInGauge, R, profileGaugeFixOVR);
 
-    // perform the update
-    GaugeFixOVRQuda.TPSTART(QUDA_PROFILE_COMPUTE);
+    // Perform the update
+    profileGaugeFixOVR.TPSTART(QUDA_PROFILE_COMPUTE);
     gaugeFixingOVR(*cudaInGaugeEx, gauge_dir, Nsteps, verbose_interval, relax_boost, tolerance, reunit_interval,
                    stopWtheta);
-    GaugeFixOVRQuda.TPSTOP(QUDA_PROFILE_COMPUTE);
+    profileGaugeFixOVR.TPSTOP(QUDA_PROFILE_COMPUTE);
 
-    //HOW TO COPY BACK TO CPU: cudaInGaugeEx->cpuGauge
     copyExtendedGauge(*cudaInGauge, *cudaInGaugeEx, QUDA_CUDA_FIELD_LOCATION);
   }
-
-  // copy the gauge field back to the host
-  GaugeFixOVRQuda.TPSTART(QUDA_PROFILE_D2H);
+  
+  // Copy the gauge field back to the host
+  profileGaugeFixOVR.TPSTART(QUDA_PROFILE_D2H);
   cudaInGauge->saveCPUField(*cpuGauge);
-  GaugeFixOVRQuda.TPSTOP(QUDA_PROFILE_D2H);
+  profileGaugeFixOVR.TPSTOP(QUDA_PROFILE_D2H);
 
-  GaugeFixOVRQuda.TPSTOP(QUDA_PROFILE_TOTAL);
+  profileGaugeFixOVR.TPSTOP(QUDA_PROFILE_TOTAL);
 
   if (param->make_resident_gauge) {
     if (gaugePrecise != nullptr) delete gaugePrecise;
@@ -5871,9 +5866,9 @@ int computeGaugeFixingOVRQuda(void *gauge, const unsigned int gauge_dir, const u
   }
 
   if(timeinfo){
-    timeinfo[0] = GaugeFixOVRQuda.Last(QUDA_PROFILE_H2D);
-    timeinfo[1] = GaugeFixOVRQuda.Last(QUDA_PROFILE_COMPUTE);
-    timeinfo[2] = GaugeFixOVRQuda.Last(QUDA_PROFILE_D2H);
+    timeinfo[0] = profileGaugeFixOVR.Last(QUDA_PROFILE_H2D);
+    timeinfo[1] = profileGaugeFixOVR.Last(QUDA_PROFILE_COMPUTE);
+    timeinfo[2] = profileGaugeFixOVR.Last(QUDA_PROFILE_D2H);
   }
 
   return 0;
@@ -5883,11 +5878,11 @@ int computeGaugeFixingFFTQuda(void* gauge, const unsigned int gauge_dir,  const
   const unsigned int verbose_interval, const double alpha, const unsigned int autotune, const double tolerance, \
   const unsigned int  stopWtheta, QudaGaugeParam* param , double* timeinfo)
 {
-  GaugeFixFFTQuda.TPSTART(QUDA_PROFILE_TOTAL);
+  profileGaugeFixFFT.TPSTART(QUDA_PROFILE_TOTAL);
 
   checkGaugeParam(param);
 
-  GaugeFixFFTQuda.TPSTART(QUDA_PROFILE_INIT);
+  profileGaugeFixFFT.TPSTART(QUDA_PROFILE_INIT);
 
   GaugeFieldParam gParam(gauge, *param);
   auto *cpuGauge = new cpuGaugeField(gParam);
@@ -5900,33 +5895,27 @@ int computeGaugeFixingFFTQuda(void* gauge, const unsigned int gauge_dir,  const
   auto *cudaInGauge = new cudaGaugeField(gParam);
 
 
-  GaugeFixFFTQuda.TPSTOP(QUDA_PROFILE_INIT);
+  profileGaugeFixFFT.TPSTOP(QUDA_PROFILE_INIT);
 
-  GaugeFixFFTQuda.TPSTART(QUDA_PROFILE_H2D);
+  profileGaugeFixFFT.TPSTART(QUDA_PROFILE_H2D);
 
-  //if (!param->use_resident_gauge) {   // load fields onto the device
   cudaInGauge->loadCPUField(*cpuGauge);
-  /*} else { // or use resident fields already present
-    if (!gaugePrecise) errorQuda("No resident gauge field allocated");
-    cudaInGauge = gaugePrecise;
-    gaugePrecise = nullptr;
-  } */
 
-  GaugeFixFFTQuda.TPSTOP(QUDA_PROFILE_H2D);
+  profileGaugeFixFFT.TPSTOP(QUDA_PROFILE_H2D);
 
   // perform the update
-  GaugeFixFFTQuda.TPSTART(QUDA_PROFILE_COMPUTE);
+  profileGaugeFixFFT.TPSTART(QUDA_PROFILE_COMPUTE);
 
   gaugeFixingFFT(*cudaInGauge, gauge_dir, Nsteps, verbose_interval, alpha, autotune, tolerance, stopWtheta);
 
-  GaugeFixFFTQuda.TPSTOP(QUDA_PROFILE_COMPUTE);
+  profileGaugeFixFFT.TPSTOP(QUDA_PROFILE_COMPUTE);
 
   // copy the gauge field back to the host
-  GaugeFixFFTQuda.TPSTART(QUDA_PROFILE_D2H);
+  profileGaugeFixFFT.TPSTART(QUDA_PROFILE_D2H);
   cudaInGauge->saveCPUField(*cpuGauge);
-  GaugeFixFFTQuda.TPSTOP(QUDA_PROFILE_D2H);
+  profileGaugeFixFFT.TPSTOP(QUDA_PROFILE_D2H);
 
-  GaugeFixFFTQuda.TPSTOP(QUDA_PROFILE_TOTAL);
+  profileGaugeFixFFT.TPSTOP(QUDA_PROFILE_TOTAL);
 
   if (param->make_resident_gauge) {
     if (gaugePrecise != nullptr) delete gaugePrecise;
@@ -5934,11 +5923,11 @@ int computeGaugeFixingFFTQuda(void* gauge, const unsigned int gauge_dir,  const
   } else {
     delete cudaInGauge;
   }
-
+  
   if (timeinfo) {
-    timeinfo[0] = GaugeFixFFTQuda.Last(QUDA_PROFILE_H2D);
-    timeinfo[1] = GaugeFixFFTQuda.Last(QUDA_PROFILE_COMPUTE);
-    timeinfo[2] = GaugeFixFFTQuda.Last(QUDA_PROFILE_D2H);
+    timeinfo[0] = profileGaugeFixFFT.Last(QUDA_PROFILE_H2D);
+    timeinfo[1] = profileGaugeFixFFT.Last(QUDA_PROFILE_COMPUTE);
+    timeinfo[2] = profileGaugeFixFFT.Last(QUDA_PROFILE_D2H);
   }
 
   return 0;
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 7c357e67ac..ed9e9df62f 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -239,6 +239,10 @@ if(QUDA_GAUGE_ALG)
   add_executable(gauge_alg_test gauge_alg_test.cpp)
   target_link_libraries(gauge_alg_test ${TEST_LIBS})
   quda_checkbuildtest(gauge_alg_test QUDA_BUILD_ALL_TESTS)
+
+  add_executable(gauge_alg_ctest gauge_alg_ctest.cpp)
+  target_link_libraries(gauge_alg_ctest ${TEST_LIBS})
+  quda_checkbuildtest(gauge_alg_ctest QUDA_BUILD_ALL_TESTS)
   install(TARGETS gauge_alg_test ${QUDA_EXCLUDE_FROM_INSTALL} DESTINATION ${CMAKE_INSTALL_BINDIR})
 
   add_executable(heatbath_test heatbath_test.cpp)
diff --git a/tests/gauge_alg_ctest.cpp b/tests/gauge_alg_ctest.cpp
new file mode 100644
index 0000000000..410d2304bd
--- /dev/null
+++ b/tests/gauge_alg_ctest.cpp
@@ -0,0 +1,274 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <quda.h>
+#include <quda_internal.h>
+#include <gauge_field.h>
+
+#include <comm_quda.h>
+#include <host_utils.h>
+#include <command_line_params.h>
+#include <gauge_tools.h>
+
+#include <pgauge_monte.h>
+#include <random_quda.h>
+#include <unitarization_links.h>
+
+#include <qio_field.h>
+
+#include <gtest/gtest.h>
+
+using namespace quda;
+
+class GaugeAlgTest : public ::testing::Test
+{  
+ protected:
+
+  QudaGaugeParam param;
+  
+  Timer a0,a1;
+  double2 detu;
+  double3 plaq;
+  cudaGaugeField *U;
+  int nsteps;
+  int nhbsteps;
+  int novrsteps;
+  bool coldstart;
+  double beta_value;
+
+  RNG * randstates;
+
+  
+  void SetReunitarizationConsts(){
+    const double unitarize_eps = 1e-14;
+    const double max_error = 1e-10;
+    const int reunit_allow_svd = 1;
+    const int reunit_svd_only  = 0;
+    const double svd_rel_error = 1e-6;
+    const double svd_abs_error = 1e-6;
+    setUnitarizeLinksConstants(unitarize_eps, max_error,
+                               reunit_allow_svd, reunit_svd_only,
+                               svd_rel_error, svd_abs_error);
+
+  }
+
+  bool checkDimsPartitioned()
+  {
+    if (comm_dim_partitioned(0) || comm_dim_partitioned(1) || comm_dim_partitioned(2) || comm_dim_partitioned(3))
+      return true;
+    return false;
+  }
+
+  bool comparePlaquette(double3 a, double3 b){
+    double a0,a1,a2;
+    a0 = std::abs(a.x - b.x);
+    a1 = std::abs(a.y - b.y);
+    a2 = std::abs(a.z - b.z);
+    double prec_val = 1.0e-5;
+    if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1.0e-15;
+    if ((a0 < prec_val) && (a1 < prec_val) && (a2 < prec_val)) return true;
+    return false;
+  }
+
+  bool CheckDeterminant(double2 detu){
+    double prec_val = 5e-8;
+    if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1.0e-15;
+    if (std::abs(1.0 - detu.x) < prec_val && std::abs(detu.y) < prec_val) return true;
+    return false;
+  }
+
+  virtual void SetUp() {
+    setVerbosity(QUDA_VERBOSE);
+
+    param = newQudaGaugeParam();
+
+    // Setup gauge container.
+    param.cpu_prec = prec;
+    param.cpu_prec = prec;
+    param.cuda_prec = prec;
+    param.reconstruct = link_recon;
+    param.cuda_prec_sloppy = prec;
+    param.reconstruct_sloppy = link_recon;
+
+    param.type = QUDA_WILSON_LINKS;
+    param.gauge_order = QUDA_MILC_GAUGE_ORDER;
+
+    param.X[0] = xdim;
+    param.X[1] = ydim;
+    param.X[2] = zdim;
+    param.X[3] = tdim;
+    setDims(param.X);
+
+    param.anisotropy = 1.0;  //don't support anisotropy for now!!!!!!
+    param.t_boundary = QUDA_PERIODIC_T;
+    param.gauge_fix = QUDA_GAUGE_FIXED_NO;
+    param.ga_pad = 0;
+
+    GaugeFieldParam gParam(0, param);
+    gParam.pad = 0;
+    gParam.ghostExchange = QUDA_GHOST_EXCHANGE_NO;
+    gParam.create      = QUDA_NULL_FIELD_CREATE;
+    gParam.link_type   = param.type;
+    gParam.reconstruct = param.reconstruct;
+    gParam.setPrecision(gParam.Precision(), true);
+
+#ifdef MULTI_GPU
+    int y[4];
+    int R[4] = {0,0,0,0};
+    for(int dir=0; dir<4; ++dir) if(comm_dim_partitioned(dir)) R[dir] = 2;
+    for(int dir=0; dir<4; ++dir) y[dir] = param.X[dir] + 2 * R[dir];
+    int pad = 0;
+    GaugeFieldParam gParamEx(y, prec, link_recon,
+                             pad, QUDA_VECTOR_GEOMETRY, QUDA_GHOST_EXCHANGE_EXTENDED);
+    gParamEx.create = QUDA_ZERO_FIELD_CREATE;
+    gParamEx.order = gParam.order;
+    gParamEx.siteSubset = QUDA_FULL_SITE_SUBSET;
+    gParamEx.t_boundary = gParam.t_boundary;
+    gParamEx.nFace = 1;
+    for(int dir=0; dir<4; ++dir) gParamEx.r[dir] = R[dir];
+    U = new cudaGaugeField(gParamEx);
+#else
+    U = new cudaGaugeField(gParam);
+#endif
+    // CURAND random generator initialization
+    randstates = new RNG(gParam, 1234);
+    randstates->Init();
+
+    nsteps = heatbath_num_steps;
+    nhbsteps = heatbath_num_heatbath_per_step;
+    novrsteps = heatbath_num_overrelax_per_step;
+    coldstart = heatbath_coldstart;
+    beta_value = heatbath_beta_value;
+
+    a0.Start(__func__, __FILE__, __LINE__);
+    a1.Start(__func__, __FILE__, __LINE__);
+
+    int *num_failures_h = (int *)mapped_malloc(sizeof(int));
+    int *num_failures_d = (int *)get_mapped_device_pointer(num_failures_h);
+
+    if (link_recon != QUDA_RECONSTRUCT_8 && coldstart)
+      InitGaugeField(*U);
+    else
+      InitGaugeField(*U, *randstates);
+
+    // Reunitarization setup
+    SetReunitarizationConsts();
+    plaquette(*U);
+
+    for(int step=1; step<=nsteps; ++step){
+      printfQuda("Step %d\n",step);
+      Monte(*U, *randstates, beta_value, nhbsteps, novrsteps);
+
+      //Reunitarize gauge links...
+      *num_failures_h = 0;
+      unitarizeLinks(*U, num_failures_d);
+      qudaDeviceSynchronize();
+      if (*num_failures_h > 0) errorQuda("Error in the unitarization\n");
+
+      plaquette(*U);
+    }
+    a1.Stop(__func__, __FILE__, __LINE__);
+
+    printfQuda("Time Monte -> %.6f s\n", a1.Last());
+    plaq = plaquette(*U);
+    printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z);
+
+    host_free(num_failures_h);
+  }
+
+  virtual void TearDown() {
+    detu = getLinkDeterminant(*U);
+    double2 tru = getLinkTrace(*U);
+    printfQuda("Det: %.16e:%.16e\n", detu.x, detu.y);
+    printfQuda("Tr: %.16e:%.16e\n", tru.x/3.0, tru.y/3.0);
+
+    delete U;
+    //Release all temporary memory used for data exchange between GPUs in multi-GPU mode
+    PGaugeExchangeFree();
+
+    a0.Stop(__func__, __FILE__, __LINE__);
+    printfQuda("Time -> %.6f s\n", a0.Last());
+    randstates->Release();
+    delete randstates;
+  }
+};
+
+TEST_F(GaugeAlgTest, Generation)
+{
+  detu = getLinkDeterminant(*U);
+  ASSERT_TRUE(CheckDeterminant(detu));
+}
+
+TEST_F(GaugeAlgTest, Landau_Overrelaxation)
+{
+  printfQuda("Landau gauge fixing with overrelaxation\n");
+  gaugeFixingOVR(*U, 4, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, gf_theta_condition);
+  auto plaq_gf = plaquette(*U);
+  printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
+  ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
+}
+
+TEST_F(GaugeAlgTest, Coulomb_Overrelaxation)
+{
+  printfQuda("Coulomb gauge fixing with overrelaxation\n");
+  gaugeFixingOVR(*U, 3, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, gf_theta_condition);
+  auto plaq_gf = plaquette(*U);
+  printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
+  ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
+}
+
+TEST_F(GaugeAlgTest, Landau_FFT)
+{
+  if (!checkDimsPartitioned()) {
+    printfQuda("Landau gauge fixing with steepest descent method with FFTs\n");
+    gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, gf_theta_condition);
+    auto plaq_gf = plaquette(*U);
+    printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
+    ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
+  }
+}
+
+TEST_F(GaugeAlgTest, Coulomb_FFT)
+{
+  if (!checkDimsPartitioned()) {
+    printfQuda("Coulomb gauge fixing with steepest descent method with FFTs\n");
+    gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, gf_theta_condition);
+    auto plaq_gf = plaquette(*U);
+    printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
+    ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
+  }
+}
+
+int main(int argc, char **argv)
+{
+  // initalize google test, includes command line options
+  ::testing::InitGoogleTest(&argc, argv);
+  // return code for google test
+  int test_rc = 0;
+  xdim=ydim=zdim=tdim=32;
+
+  // command line options  
+  auto app = make_app();
+  add_gaugefix_option_group(app);
+  add_heatbath_option_group(app);
+  try {
+    app->parse(argc, argv);
+  } catch (const CLI::ParseError &e) {
+    return app->exit(e);
+  }
+
+  initComms(argc, argv, gridsize_from_cmdline);
+
+  // Ensure gtest prints only from rank 0
+  ::testing::TestEventListeners &listeners = ::testing::UnitTest::GetInstance()->listeners();
+  if (comm_rank() != 0) { delete listeners.Release(listeners.default_result_printer()); }
+
+  initQuda(device_ordinal);
+  test_rc = RUN_ALL_TESTS();
+  endQuda();
+
+  finalizeComms();
+
+  return test_rc;
+}
diff --git a/tests/gauge_alg_test.cpp b/tests/gauge_alg_test.cpp
index 410d2304bd..5579460ab7 100644
--- a/tests/gauge_alg_test.cpp
+++ b/tests/gauge_alg_test.cpp
@@ -9,6 +9,7 @@
 #include <comm_quda.h>
 #include <host_utils.h>
 #include <command_line_params.h>
+#include <misc.h>
 #include <gauge_tools.h>
 
 #include <pgauge_monte.h>
@@ -21,254 +22,247 @@
 
 using namespace quda;
 
-class GaugeAlgTest : public ::testing::Test
-{  
- protected:
-
-  QudaGaugeParam param;
+void display_test_info()
+{
+  printfQuda("running the following test:\n");
   
-  Timer a0,a1;
-  double2 detu;
-  double3 plaq;
-  cudaGaugeField *U;
-  int nsteps;
-  int nhbsteps;
-  int novrsteps;
-  bool coldstart;
-  double beta_value;
+  printfQuda("prec    sloppy_prec    link_recon  sloppy_link_recon S_dimension T_dimension Ls_dimension\n");
+  printfQuda("%s   %s             %s            %s            %d/%d/%d          %d         %d\n", get_prec_str(prec),
+             get_prec_str(prec_sloppy), get_recon_str(link_recon), get_recon_str(link_recon_sloppy), xdim, ydim, zdim,
+             tdim, Lsdim);
+
+  printfQuda("Grid partition info:     X  Y  Z  T\n");
+  printfQuda("                         %d  %d  %d  %d\n", dimPartitioned(0), dimPartitioned(1), dimPartitioned(2),
+             dimPartitioned(3));
+}
 
-  RNG * randstates;
 
+void SetReunitarizationConsts(){
+  const double unitarize_eps = 1e-14;
+  const double max_error = 1e-10;
+  const int reunit_allow_svd = 1;
+  const int reunit_svd_only  = 0;
+  const double svd_rel_error = 1e-6;
+  const double svd_abs_error = 1e-6;
+  setUnitarizeLinksConstants(unitarize_eps, max_error,
+			     reunit_allow_svd, reunit_svd_only,
+			     svd_rel_error, svd_abs_error);
   
-  void SetReunitarizationConsts(){
-    const double unitarize_eps = 1e-14;
-    const double max_error = 1e-10;
-    const int reunit_allow_svd = 1;
-    const int reunit_svd_only  = 0;
-    const double svd_rel_error = 1e-6;
-    const double svd_abs_error = 1e-6;
-    setUnitarizeLinksConstants(unitarize_eps, max_error,
-                               reunit_allow_svd, reunit_svd_only,
-                               svd_rel_error, svd_abs_error);
-
-  }
-
-  bool checkDimsPartitioned()
-  {
-    if (comm_dim_partitioned(0) || comm_dim_partitioned(1) || comm_dim_partitioned(2) || comm_dim_partitioned(3))
-      return true;
-    return false;
-  }
-
-  bool comparePlaquette(double3 a, double3 b){
-    double a0,a1,a2;
-    a0 = std::abs(a.x - b.x);
-    a1 = std::abs(a.y - b.y);
-    a2 = std::abs(a.z - b.z);
-    double prec_val = 1.0e-5;
-    if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1.0e-15;
-    if ((a0 < prec_val) && (a1 < prec_val) && (a2 < prec_val)) return true;
-    return false;
-  }
-
-  bool CheckDeterminant(double2 detu){
-    double prec_val = 5e-8;
-    if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1.0e-15;
-    if (std::abs(1.0 - detu.x) < prec_val && std::abs(detu.y) < prec_val) return true;
-    return false;
-  }
-
-  virtual void SetUp() {
-    setVerbosity(QUDA_VERBOSE);
-
-    param = newQudaGaugeParam();
-
-    // Setup gauge container.
-    param.cpu_prec = prec;
-    param.cpu_prec = prec;
-    param.cuda_prec = prec;
-    param.reconstruct = link_recon;
-    param.cuda_prec_sloppy = prec;
-    param.reconstruct_sloppy = link_recon;
-
-    param.type = QUDA_WILSON_LINKS;
-    param.gauge_order = QUDA_MILC_GAUGE_ORDER;
-
-    param.X[0] = xdim;
-    param.X[1] = ydim;
-    param.X[2] = zdim;
-    param.X[3] = tdim;
-    setDims(param.X);
-
-    param.anisotropy = 1.0;  //don't support anisotropy for now!!!!!!
-    param.t_boundary = QUDA_PERIODIC_T;
-    param.gauge_fix = QUDA_GAUGE_FIXED_NO;
-    param.ga_pad = 0;
-
-    GaugeFieldParam gParam(0, param);
-    gParam.pad = 0;
-    gParam.ghostExchange = QUDA_GHOST_EXCHANGE_NO;
-    gParam.create      = QUDA_NULL_FIELD_CREATE;
-    gParam.link_type   = param.type;
-    gParam.reconstruct = param.reconstruct;
-    gParam.setPrecision(gParam.Precision(), true);
-
-#ifdef MULTI_GPU
-    int y[4];
-    int R[4] = {0,0,0,0};
-    for(int dir=0; dir<4; ++dir) if(comm_dim_partitioned(dir)) R[dir] = 2;
-    for(int dir=0; dir<4; ++dir) y[dir] = param.X[dir] + 2 * R[dir];
-    int pad = 0;
-    GaugeFieldParam gParamEx(y, prec, link_recon,
-                             pad, QUDA_VECTOR_GEOMETRY, QUDA_GHOST_EXCHANGE_EXTENDED);
-    gParamEx.create = QUDA_ZERO_FIELD_CREATE;
-    gParamEx.order = gParam.order;
-    gParamEx.siteSubset = QUDA_FULL_SITE_SUBSET;
-    gParamEx.t_boundary = gParam.t_boundary;
-    gParamEx.nFace = 1;
-    for(int dir=0; dir<4; ++dir) gParamEx.r[dir] = R[dir];
-    U = new cudaGaugeField(gParamEx);
-#else
-    U = new cudaGaugeField(gParam);
-#endif
-    // CURAND random generator initialization
-    randstates = new RNG(gParam, 1234);
-    randstates->Init();
-
-    nsteps = heatbath_num_steps;
-    nhbsteps = heatbath_num_heatbath_per_step;
-    novrsteps = heatbath_num_overrelax_per_step;
-    coldstart = heatbath_coldstart;
-    beta_value = heatbath_beta_value;
-
-    a0.Start(__func__, __FILE__, __LINE__);
-    a1.Start(__func__, __FILE__, __LINE__);
-
-    int *num_failures_h = (int *)mapped_malloc(sizeof(int));
-    int *num_failures_d = (int *)get_mapped_device_pointer(num_failures_h);
-
-    if (link_recon != QUDA_RECONSTRUCT_8 && coldstart)
-      InitGaugeField(*U);
-    else
-      InitGaugeField(*U, *randstates);
-
-    // Reunitarization setup
-    SetReunitarizationConsts();
-    plaquette(*U);
-
-    for(int step=1; step<=nsteps; ++step){
-      printfQuda("Step %d\n",step);
-      Monte(*U, *randstates, beta_value, nhbsteps, novrsteps);
-
-      //Reunitarize gauge links...
-      *num_failures_h = 0;
-      unitarizeLinks(*U, num_failures_d);
-      qudaDeviceSynchronize();
-      if (*num_failures_h > 0) errorQuda("Error in the unitarization\n");
-
-      plaquette(*U);
-    }
-    a1.Stop(__func__, __FILE__, __LINE__);
-
-    printfQuda("Time Monte -> %.6f s\n", a1.Last());
-    plaq = plaquette(*U);
-    printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z);
-
-    host_free(num_failures_h);
-  }
-
-  virtual void TearDown() {
-    detu = getLinkDeterminant(*U);
-    double2 tru = getLinkTrace(*U);
-    printfQuda("Det: %.16e:%.16e\n", detu.x, detu.y);
-    printfQuda("Tr: %.16e:%.16e\n", tru.x/3.0, tru.y/3.0);
-
-    delete U;
-    //Release all temporary memory used for data exchange between GPUs in multi-GPU mode
-    PGaugeExchangeFree();
-
-    a0.Stop(__func__, __FILE__, __LINE__);
-    printfQuda("Time -> %.6f s\n", a0.Last());
-    randstates->Release();
-    delete randstates;
-  }
-};
-
-TEST_F(GaugeAlgTest, Generation)
-{
-  detu = getLinkDeterminant(*U);
-  ASSERT_TRUE(CheckDeterminant(detu));
-}
-
-TEST_F(GaugeAlgTest, Landau_Overrelaxation)
-{
-  printfQuda("Landau gauge fixing with overrelaxation\n");
-  gaugeFixingOVR(*U, 4, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, gf_theta_condition);
-  auto plaq_gf = plaquette(*U);
-  printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
-  ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
 }
 
-TEST_F(GaugeAlgTest, Coulomb_Overrelaxation)
+bool checkDimsPartitioned()
 {
-  printfQuda("Coulomb gauge fixing with overrelaxation\n");
-  gaugeFixingOVR(*U, 3, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, gf_theta_condition);
-  auto plaq_gf = plaquette(*U);
-  printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
-  ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
+  if (comm_dim_partitioned(0) || comm_dim_partitioned(1) || comm_dim_partitioned(2) || comm_dim_partitioned(3))
+    return true;
+  return false;
 }
 
-TEST_F(GaugeAlgTest, Landau_FFT)
+bool comparePlaquette(double3 a, double3 b)
 {
-  if (!checkDimsPartitioned()) {
-    printfQuda("Landau gauge fixing with steepest descent method with FFTs\n");
-    gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, gf_theta_condition);
-    auto plaq_gf = plaquette(*U);
-    printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
-    ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
-  }
+  printfQuda("Plaq:    %.16e, %.16e, %.16e\n", a.x, a.y, a.z);
+  printfQuda("Plaq_gf: %.16e, %.16e, %.16e\n", b.x, b.y, b.z);   
+  double a0,a1,a2;
+  a0 = std::abs(a.x - b.x);
+  a1 = std::abs(a.y - b.y);
+  a2 = std::abs(a.z - b.z);
+  double prec_val = 1.0e-5;
+  if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1.0e-15;
+  return ((a0 < prec_val) && (a1 < prec_val) && (a2 < prec_val));
 }
 
-TEST_F(GaugeAlgTest, Coulomb_FFT)
+bool checkDeterminant(double2 detu)
 {
-  if (!checkDimsPartitioned()) {
-    printfQuda("Coulomb gauge fixing with steepest descent method with FFTs\n");
-    gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, gf_theta_condition);
-    auto plaq_gf = plaquette(*U);
-    printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
-    ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
-  }
+  printfQuda("Det: %.16e: %.16e\n", detu.x, detu.y);
+  double prec_val = 5e-8;
+  if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1.0e-15;
+  return std::abs(1.0 - detu.x) < prec_val && std::abs(detu.y) < prec_val;
 }
 
 int main(int argc, char **argv)
 {
-  // initalize google test, includes command line options
-  ::testing::InitGoogleTest(&argc, argv);
-  // return code for google test
-  int test_rc = 0;
-  xdim=ydim=zdim=tdim=32;
-
   // command line options  
   auto app = make_app();
   add_gaugefix_option_group(app);
   add_heatbath_option_group(app);
+  CLI::TransformPairs<int> test_type_map {{"OVR", 0}, {"FFT", 1}};
+  app->add_option("--test", test_type, "Test method")->transform(CLI::CheckedTransformer(test_type_map));
   try {
     app->parse(argc, argv);
   } catch (const CLI::ParseError &e) {
     return app->exit(e);
   }
 
+  if (prec_sloppy == QUDA_INVALID_PRECISION) prec_sloppy = prec;
+  if (link_recon_sloppy == QUDA_RECONSTRUCT_INVALID) link_recon_sloppy = link_recon;
+
+  // initialize QMP/MPI, QUDA comms grid and RNG (host_utils.cpp)
   initComms(argc, argv, gridsize_from_cmdline);
 
-  // Ensure gtest prints only from rank 0
-  ::testing::TestEventListeners &listeners = ::testing::UnitTest::GetInstance()->listeners();
-  if (comm_rank() != 0) { delete listeners.Release(listeners.default_result_printer()); }
+  // call srand() with a rank-dependent seed
+  initRand();
+
+  display_test_info();
 
+  // initialize the QUDA library
   initQuda(device_ordinal);
-  test_rc = RUN_ALL_TESTS();
-  endQuda();
 
-  finalizeComms();
+  // *** QUDA parameters begin here.
+  setVerbosity(QUDA_VERBOSE);
+  QudaGaugeParam param = newQudaGaugeParam();
 
-  return test_rc;
+  double3 plaq;
+  cudaGaugeField *U;
+  int nsteps = heatbath_num_steps;
+  int nhbsteps = heatbath_num_heatbath_per_step;
+  int novrsteps = heatbath_num_overrelax_per_step;
+  bool coldstart = heatbath_coldstart;
+  double beta_value = heatbath_beta_value;
+  
+  RNG * randstates;
+  
+  // Setup gauge container.
+  param.cpu_prec = prec;
+  param.cpu_prec = prec;
+  param.cuda_prec = prec;
+  param.reconstruct = link_recon;
+  param.cuda_prec_sloppy = prec;
+  param.reconstruct_sloppy = link_recon;
+  
+  param.type = QUDA_WILSON_LINKS;
+  param.gauge_order = QUDA_MILC_GAUGE_ORDER;
+  
+  param.X[0] = xdim;
+  param.X[1] = ydim;
+  param.X[2] = zdim;
+  param.X[3] = tdim;
+  setDims(param.X);
+  
+  param.anisotropy = 1.0;  //don't support anisotropy for now!!!!!!
+  param.t_boundary = QUDA_PERIODIC_T;
+  param.gauge_fix = QUDA_GAUGE_FIXED_NO;
+  param.ga_pad = 0;
+  
+  GaugeFieldParam gParam(0, param);
+  gParam.pad = 0;
+  gParam.ghostExchange = QUDA_GHOST_EXCHANGE_NO;
+  gParam.create      = QUDA_NULL_FIELD_CREATE;
+  gParam.link_type   = param.type;
+  gParam.reconstruct = param.reconstruct;
+  gParam.setPrecision(gParam.Precision(), true);
+  
+  int y[4];
+  int R[4] = {0,0,0,0};
+  for(int dir=0; dir<4; ++dir) if(comm_dim_partitioned(dir)) R[dir] = 2;
+  for(int dir=0; dir<4; ++dir) y[dir] = param.X[dir] + 2 * R[dir];
+  int pad = 0;
+  GaugeFieldParam gParamEx(y, prec, link_recon,
+			   pad, QUDA_VECTOR_GEOMETRY, QUDA_GHOST_EXCHANGE_EXTENDED);
+  gParamEx.create = QUDA_ZERO_FIELD_CREATE;
+  gParamEx.order = gParam.order;
+  gParamEx.siteSubset = QUDA_FULL_SITE_SUBSET;
+  gParamEx.t_boundary = gParam.t_boundary;
+  gParamEx.nFace = 1;
+  for(int dir=0; dir<4; ++dir) gParamEx.r[dir] = R[dir];
+  U = new cudaGaugeField(gParamEx);
+
+  // CURAND random generator initialization
+  randstates = new RNG(gParam, 1234);
+  randstates->Init();
+    
+  int *num_failures_h = (int *)mapped_malloc(sizeof(int));
+  int *num_failures_d = (int *)get_mapped_device_pointer(num_failures_h);
+  
+  if (link_recon != QUDA_RECONSTRUCT_8 && coldstart)
+    InitGaugeField(*U);
+  else
+    InitGaugeField(*U, *randstates);
+  
+  // Reunitarization setup
+  SetReunitarizationConsts();
+  plaquette(*U);
+  
+  for(int step=1; step<=nsteps; ++step){
+    printfQuda("Step %d\n",step);
+    Monte(*U, *randstates, beta_value, nhbsteps, novrsteps);
+    
+    //Reunitarize gauge links...
+    *num_failures_h = 0;
+    unitarizeLinks(*U, num_failures_d);
+    qudaDeviceSynchronize();
+    if (*num_failures_h > 0) errorQuda("Error in the unitarization\n");
+    
+    plaquette(*U);
+  }
+  
+  plaq = plaquette(*U);
+  printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z);
+  
+  host_free(num_failures_h);
+
+  // Gauge Fixing Routines
+  //---------------------------------------------------------------------------
+  switch (test_type) {
+  case 0:  
+    printfQuda("%s gauge fixing with overrelaxation\n", gf_gauge_dir == 4 ? "Landau" : "Coulomb");
+    gaugeFixingOVR(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, gf_theta_condition);
+    comparePlaquette(plaq, plaquette(*U));
+    break;
+    
+  case 1:
+    if (!checkDimsPartitioned()) {
+      printfQuda("%s gauge fixing with steepest descent method with FFTs\n", gf_gauge_dir == 4 ? "Landau" : "Coulomb");
+      gaugeFixingFFT(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, gf_theta_condition);
+      comparePlaquette(plaq, plaquette(*U));
+    } else {
+      errorQuda("FFT gauge fixing not supported for multi GPU geometry");
+    }
+    break;
+    
+  default:
+    errorQuda("Unknown test type %d", test_type);
+  }
+
+  double2 link_trace = getLinkTrace(*U);
+  printfQuda("Tr: %.16e:%.16e\n", link_trace.x/3.0, link_trace.y/3.0);
+
+  // Save if output string is specified
+  if (strcmp(gauge_outfile,"")) {
+    
+    printfQuda("Saving the gauge field to file %s\n", gauge_outfile);
+
+    QudaGaugeParam gauge_param = newQudaGaugeParam();
+    setWilsonGaugeParam(gauge_param);
+    
+    void *cpu_gauge[4];
+    for (int dir = 0; dir < 4; dir++) { cpu_gauge[dir] = malloc(V * gauge_site_size * gauge_param.cpu_prec); }
+    
+    cudaGaugeField *gauge;
+    gauge = new cudaGaugeField(gParam);
+    
+    // copy into regular field
+    copyExtendedGauge(*gauge, *U, QUDA_CUDA_FIELD_LOCATION);    
+    saveGaugeFieldQuda((void*)cpu_gauge, (void*)gauge, &gauge_param);
+    
+    // Write to disk
+    write_gauge_field(gauge_outfile, cpu_gauge, gauge_param.cpu_prec, gauge_param.X, 0, (char**)0);
+    
+    for (int dir = 0; dir<4; dir++) free(cpu_gauge[dir]);
+    delete gauge;
+  } else {
+    printfQuda("No output file specified.\n");
+  }  
+  
+  delete U;
+  
+  //Release all temporary memory used for data exchange between GPUs in multi-GPU mode
+  PGaugeExchangeFree();
+  
+  randstates->Release();
+  delete randstates;
+  
+  freeGaugeQuda();    
+  endQuda();
+  finalizeComms();
+  
+  return 0;
 }
diff --git a/tests/su3_test.cpp b/tests/su3_test.cpp
index 86827d3096..1c0b75c3a8 100644
--- a/tests/su3_test.cpp
+++ b/tests/su3_test.cpp
@@ -114,34 +114,34 @@ int main(int argc, char **argv)
   // initialize QMP/MPI, QUDA comms grid and RNG (host_utils.cpp)
   initComms(argc, argv, gridsize_from_cmdline);
 
-  QudaGaugeParam gauge_param = newQudaGaugeParam();
-  if (prec_sloppy == QUDA_INVALID_PRECISION) 
-    prec_sloppy = prec;
-  if (link_recon_sloppy == QUDA_RECONSTRUCT_INVALID) 
-    link_recon_sloppy = link_recon;
+  if (prec_sloppy == QUDA_INVALID_PRECISION) prec_sloppy = prec;
+  if (link_recon_sloppy == QUDA_RECONSTRUCT_INVALID) link_recon_sloppy = link_recon;
+ 
+  initQuda(device_ordinal);
+  setVerbosity(verbosity);
+
+  // call srand() with a rank-dependent seed
+  initRand();
 
+  QudaGaugeParam gauge_param = newQudaGaugeParam();
   setGaugeParam(gauge_param);
   setDims(gauge_param.X);
-
+  
+  // All user inputs now defined
+  display_test_info();
+  
+  // *** QUDA parameters begin here.  
   void *gauge[4], *new_gauge[4];
-
   for (int dir = 0; dir < 4; dir++) {
     gauge[dir] = malloc(V * gauge_site_size * host_gauge_data_type_size);
     new_gauge[dir] = malloc(V * gauge_site_size * host_gauge_data_type_size);
   }
-
-  initQuda(device_ordinal);
-
-  setVerbosity(verbosity);
-
-  // call srand() with a rank-dependent seed
-  initRand();
-
+  
   constructHostGaugeField(gauge, gauge_param, argc, argv);
   // Load the gauge field to the device
   loadGaugeQuda((void *)gauge, &gauge_param);
   saveGaugeQuda(new_gauge, &gauge_param);
-
+  
   double plaq[3];
   plaqQuda(plaq);
   printfQuda("Computed plaquette gauge precise is %.16e (spatial = %.16e, temporal = %.16e)\n", plaq[0], plaq[1],
@@ -149,9 +149,6 @@ int main(int argc, char **argv)
 
 #ifdef GPU_GAUGE_TOOLS
 
-  // All user inputs now defined
-  display_test_info();
-
   // Topological charge and gauge energy
   double q_charge_check = 0.0;
   // Size of floating point data

From 9aa3aca72682e24716248722041cf3dc67ee77c6 Mon Sep 17 00:00:00 2001
From: cpviolator <dmhowarth26@gmail.com>
Date: Fri, 30 Apr 2021 18:54:13 -0700
Subject: [PATCH 04/32] clang tidy

---
 lib/interface_quda.cpp              |   7 +-
 tests/gauge_alg_ctest.cpp           |  80 +++++++++--------
 tests/gauge_alg_test.cpp            | 130 ++++++++++++++--------------
 tests/heatbath_test.cpp             |   2 +-
 tests/su3_test.cpp                  |  12 +--
 tests/utils/command_line_params.cpp |  38 ++++----
 6 files changed, 135 insertions(+), 134 deletions(-)

diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp
index 6638a32046..f89d899496 100644
--- a/lib/interface_quda.cpp
+++ b/lib/interface_quda.cpp
@@ -5812,7 +5812,7 @@ int computeGaugeFixingOVRQuda(void *gauge, const unsigned int gauge_dir, const u
                               double *timeinfo)
 {
   profileGaugeFixOVR.TPSTART(QUDA_PROFILE_TOTAL);
-  
+
   checkGaugeParam(param);
 
   profileGaugeFixOVR.TPSTART(QUDA_PROFILE_INIT);
@@ -5850,7 +5850,7 @@ int computeGaugeFixingOVRQuda(void *gauge, const unsigned int gauge_dir, const u
 
     copyExtendedGauge(*cudaInGauge, *cudaInGaugeEx, QUDA_CUDA_FIELD_LOCATION);
   }
-  
+
   // Copy the gauge field back to the host
   profileGaugeFixOVR.TPSTART(QUDA_PROFILE_D2H);
   cudaInGauge->saveCPUField(*cpuGauge);
@@ -5894,7 +5894,6 @@ int computeGaugeFixingFFTQuda(void* gauge, const unsigned int gauge_dir,  const
   gParam.setPrecision(gParam.Precision(), true);
   auto *cudaInGauge = new cudaGaugeField(gParam);
 
-
   profileGaugeFixFFT.TPSTOP(QUDA_PROFILE_INIT);
 
   profileGaugeFixFFT.TPSTART(QUDA_PROFILE_H2D);
@@ -5923,7 +5922,7 @@ int computeGaugeFixingFFTQuda(void* gauge, const unsigned int gauge_dir,  const
   } else {
     delete cudaInGauge;
   }
-  
+
   if (timeinfo) {
     timeinfo[0] = profileGaugeFixFFT.Last(QUDA_PROFILE_H2D);
     timeinfo[1] = profileGaugeFixFFT.Last(QUDA_PROFILE_COMPUTE);
diff --git a/tests/gauge_alg_ctest.cpp b/tests/gauge_alg_ctest.cpp
index 410d2304bd..cdaa2efd9a 100644
--- a/tests/gauge_alg_ctest.cpp
+++ b/tests/gauge_alg_ctest.cpp
@@ -22,12 +22,11 @@
 using namespace quda;
 
 class GaugeAlgTest : public ::testing::Test
-{  
- protected:
-
+{
+protected:
   QudaGaugeParam param;
-  
-  Timer a0,a1;
+
+  Timer a0, a1;
   double2 detu;
   double3 plaq;
   cudaGaugeField *U;
@@ -37,20 +36,17 @@ class GaugeAlgTest : public ::testing::Test
   bool coldstart;
   double beta_value;
 
-  RNG * randstates;
+  RNG *randstates;
 
-  
-  void SetReunitarizationConsts(){
+  void SetReunitarizationConsts()
+  {
     const double unitarize_eps = 1e-14;
     const double max_error = 1e-10;
     const int reunit_allow_svd = 1;
-    const int reunit_svd_only  = 0;
+    const int reunit_svd_only = 0;
     const double svd_rel_error = 1e-6;
     const double svd_abs_error = 1e-6;
-    setUnitarizeLinksConstants(unitarize_eps, max_error,
-                               reunit_allow_svd, reunit_svd_only,
-                               svd_rel_error, svd_abs_error);
-
+    setUnitarizeLinksConstants(unitarize_eps, max_error, reunit_allow_svd, reunit_svd_only, svd_rel_error, svd_abs_error);
   }
 
   bool checkDimsPartitioned()
@@ -60,8 +56,9 @@ class GaugeAlgTest : public ::testing::Test
     return false;
   }
 
-  bool comparePlaquette(double3 a, double3 b){
-    double a0,a1,a2;
+  bool comparePlaquette(double3 a, double3 b)
+  {
+    double a0, a1, a2;
     a0 = std::abs(a.x - b.x);
     a1 = std::abs(a.y - b.y);
     a2 = std::abs(a.z - b.z);
@@ -71,14 +68,16 @@ class GaugeAlgTest : public ::testing::Test
     return false;
   }
 
-  bool CheckDeterminant(double2 detu){
+  bool CheckDeterminant(double2 detu)
+  {
     double prec_val = 5e-8;
     if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1.0e-15;
     if (std::abs(1.0 - detu.x) < prec_val && std::abs(detu.y) < prec_val) return true;
     return false;
   }
 
-  virtual void SetUp() {
+  virtual void SetUp()
+  {
     setVerbosity(QUDA_VERBOSE);
 
     param = newQudaGaugeParam();
@@ -100,7 +99,7 @@ class GaugeAlgTest : public ::testing::Test
     param.X[3] = tdim;
     setDims(param.X);
 
-    param.anisotropy = 1.0;  //don't support anisotropy for now!!!!!!
+    param.anisotropy = 1.0; // don't support anisotropy for now!!!!!!
     param.t_boundary = QUDA_PERIODIC_T;
     param.gauge_fix = QUDA_GAUGE_FIXED_NO;
     param.ga_pad = 0;
@@ -108,25 +107,25 @@ class GaugeAlgTest : public ::testing::Test
     GaugeFieldParam gParam(0, param);
     gParam.pad = 0;
     gParam.ghostExchange = QUDA_GHOST_EXCHANGE_NO;
-    gParam.create      = QUDA_NULL_FIELD_CREATE;
-    gParam.link_type   = param.type;
+    gParam.create = QUDA_NULL_FIELD_CREATE;
+    gParam.link_type = param.type;
     gParam.reconstruct = param.reconstruct;
     gParam.setPrecision(gParam.Precision(), true);
 
 #ifdef MULTI_GPU
     int y[4];
-    int R[4] = {0,0,0,0};
-    for(int dir=0; dir<4; ++dir) if(comm_dim_partitioned(dir)) R[dir] = 2;
-    for(int dir=0; dir<4; ++dir) y[dir] = param.X[dir] + 2 * R[dir];
+    int R[4] = {0, 0, 0, 0};
+    for (int dir = 0; dir < 4; ++dir)
+      if (comm_dim_partitioned(dir)) R[dir] = 2;
+    for (int dir = 0; dir < 4; ++dir) y[dir] = param.X[dir] + 2 * R[dir];
     int pad = 0;
-    GaugeFieldParam gParamEx(y, prec, link_recon,
-                             pad, QUDA_VECTOR_GEOMETRY, QUDA_GHOST_EXCHANGE_EXTENDED);
+    GaugeFieldParam gParamEx(y, prec, link_recon, pad, QUDA_VECTOR_GEOMETRY, QUDA_GHOST_EXCHANGE_EXTENDED);
     gParamEx.create = QUDA_ZERO_FIELD_CREATE;
     gParamEx.order = gParam.order;
     gParamEx.siteSubset = QUDA_FULL_SITE_SUBSET;
     gParamEx.t_boundary = gParam.t_boundary;
     gParamEx.nFace = 1;
-    for(int dir=0; dir<4; ++dir) gParamEx.r[dir] = R[dir];
+    for (int dir = 0; dir < 4; ++dir) gParamEx.r[dir] = R[dir];
     U = new cudaGaugeField(gParamEx);
 #else
     U = new cudaGaugeField(gParam);
@@ -156,11 +155,11 @@ class GaugeAlgTest : public ::testing::Test
     SetReunitarizationConsts();
     plaquette(*U);
 
-    for(int step=1; step<=nsteps; ++step){
-      printfQuda("Step %d\n",step);
+    for (int step = 1; step <= nsteps; ++step) {
+      printfQuda("Step %d\n", step);
       Monte(*U, *randstates, beta_value, nhbsteps, novrsteps);
 
-      //Reunitarize gauge links...
+      // Reunitarize gauge links...
       *num_failures_h = 0;
       unitarizeLinks(*U, num_failures_d);
       qudaDeviceSynchronize();
@@ -177,14 +176,15 @@ class GaugeAlgTest : public ::testing::Test
     host_free(num_failures_h);
   }
 
-  virtual void TearDown() {
+  virtual void TearDown()
+  {
     detu = getLinkDeterminant(*U);
     double2 tru = getLinkTrace(*U);
     printfQuda("Det: %.16e:%.16e\n", detu.x, detu.y);
-    printfQuda("Tr: %.16e:%.16e\n", tru.x/3.0, tru.y/3.0);
+    printfQuda("Tr: %.16e:%.16e\n", tru.x / 3.0, tru.y / 3.0);
 
     delete U;
-    //Release all temporary memory used for data exchange between GPUs in multi-GPU mode
+    // Release all temporary memory used for data exchange between GPUs in multi-GPU mode
     PGaugeExchangeFree();
 
     a0.Stop(__func__, __FILE__, __LINE__);
@@ -203,7 +203,8 @@ TEST_F(GaugeAlgTest, Generation)
 TEST_F(GaugeAlgTest, Landau_Overrelaxation)
 {
   printfQuda("Landau gauge fixing with overrelaxation\n");
-  gaugeFixingOVR(*U, 4, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, gf_theta_condition);
+  gaugeFixingOVR(*U, 4, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval,
+                 gf_theta_condition);
   auto plaq_gf = plaquette(*U);
   printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
   ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
@@ -212,7 +213,8 @@ TEST_F(GaugeAlgTest, Landau_Overrelaxation)
 TEST_F(GaugeAlgTest, Coulomb_Overrelaxation)
 {
   printfQuda("Coulomb gauge fixing with overrelaxation\n");
-  gaugeFixingOVR(*U, 3, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, gf_theta_condition);
+  gaugeFixingOVR(*U, 3, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval,
+                 gf_theta_condition);
   auto plaq_gf = plaquette(*U);
   printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
   ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
@@ -222,7 +224,8 @@ TEST_F(GaugeAlgTest, Landau_FFT)
 {
   if (!checkDimsPartitioned()) {
     printfQuda("Landau gauge fixing with steepest descent method with FFTs\n");
-    gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, gf_theta_condition);
+    gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance,
+                   gf_theta_condition);
     auto plaq_gf = plaquette(*U);
     printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
     ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
@@ -233,7 +236,8 @@ TEST_F(GaugeAlgTest, Coulomb_FFT)
 {
   if (!checkDimsPartitioned()) {
     printfQuda("Coulomb gauge fixing with steepest descent method with FFTs\n");
-    gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, gf_theta_condition);
+    gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance,
+                   gf_theta_condition);
     auto plaq_gf = plaquette(*U);
     printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
     ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
@@ -246,9 +250,9 @@ int main(int argc, char **argv)
   ::testing::InitGoogleTest(&argc, argv);
   // return code for google test
   int test_rc = 0;
-  xdim=ydim=zdim=tdim=32;
+  xdim = ydim = zdim = tdim = 32;
 
-  // command line options  
+  // command line options
   auto app = make_app();
   add_gaugefix_option_group(app);
   add_heatbath_option_group(app);
diff --git a/tests/gauge_alg_test.cpp b/tests/gauge_alg_test.cpp
index 5579460ab7..ad7985c464 100644
--- a/tests/gauge_alg_test.cpp
+++ b/tests/gauge_alg_test.cpp
@@ -25,7 +25,7 @@ using namespace quda;
 void display_test_info()
 {
   printfQuda("running the following test:\n");
-  
+
   printfQuda("prec    sloppy_prec    link_recon  sloppy_link_recon S_dimension T_dimension Ls_dimension\n");
   printfQuda("%s   %s             %s            %s            %d/%d/%d          %d         %d\n", get_prec_str(prec),
              get_prec_str(prec_sloppy), get_recon_str(link_recon), get_recon_str(link_recon_sloppy), xdim, ydim, zdim,
@@ -36,18 +36,15 @@ void display_test_info()
              dimPartitioned(3));
 }
 
-
-void SetReunitarizationConsts(){
+void SetReunitarizationConsts()
+{
   const double unitarize_eps = 1e-14;
   const double max_error = 1e-10;
   const int reunit_allow_svd = 1;
-  const int reunit_svd_only  = 0;
+  const int reunit_svd_only = 0;
   const double svd_rel_error = 1e-6;
   const double svd_abs_error = 1e-6;
-  setUnitarizeLinksConstants(unitarize_eps, max_error,
-			     reunit_allow_svd, reunit_svd_only,
-			     svd_rel_error, svd_abs_error);
-  
+  setUnitarizeLinksConstants(unitarize_eps, max_error, reunit_allow_svd, reunit_svd_only, svd_rel_error, svd_abs_error);
 }
 
 bool checkDimsPartitioned()
@@ -60,8 +57,8 @@ bool checkDimsPartitioned()
 bool comparePlaquette(double3 a, double3 b)
 {
   printfQuda("Plaq:    %.16e, %.16e, %.16e\n", a.x, a.y, a.z);
-  printfQuda("Plaq_gf: %.16e, %.16e, %.16e\n", b.x, b.y, b.z);   
-  double a0,a1,a2;
+  printfQuda("Plaq_gf: %.16e, %.16e, %.16e\n", b.x, b.y, b.z);
+  double a0, a1, a2;
   a0 = std::abs(a.x - b.x);
   a1 = std::abs(a.y - b.y);
   a2 = std::abs(a.z - b.z);
@@ -80,7 +77,7 @@ bool checkDeterminant(double2 detu)
 
 int main(int argc, char **argv)
 {
-  // command line options  
+  // command line options
   auto app = make_app();
   add_gaugefix_option_group(app);
   add_heatbath_option_group(app);
@@ -117,9 +114,9 @@ int main(int argc, char **argv)
   int novrsteps = heatbath_num_overrelax_per_step;
   bool coldstart = heatbath_coldstart;
   double beta_value = heatbath_beta_value;
-  
-  RNG * randstates;
-  
+
+  RNG *randstates;
+
   // Setup gauge container.
   param.cpu_prec = prec;
   param.cpu_prec = prec;
@@ -127,142 +124,143 @@ int main(int argc, char **argv)
   param.reconstruct = link_recon;
   param.cuda_prec_sloppy = prec;
   param.reconstruct_sloppy = link_recon;
-  
+
   param.type = QUDA_WILSON_LINKS;
   param.gauge_order = QUDA_MILC_GAUGE_ORDER;
-  
+
   param.X[0] = xdim;
   param.X[1] = ydim;
   param.X[2] = zdim;
   param.X[3] = tdim;
   setDims(param.X);
-  
-  param.anisotropy = 1.0;  //don't support anisotropy for now!!!!!!
+
+  param.anisotropy = 1.0; // don't support anisotropy for now!!!!!!
   param.t_boundary = QUDA_PERIODIC_T;
   param.gauge_fix = QUDA_GAUGE_FIXED_NO;
   param.ga_pad = 0;
-  
+
   GaugeFieldParam gParam(0, param);
   gParam.pad = 0;
   gParam.ghostExchange = QUDA_GHOST_EXCHANGE_NO;
-  gParam.create      = QUDA_NULL_FIELD_CREATE;
-  gParam.link_type   = param.type;
+  gParam.create = QUDA_NULL_FIELD_CREATE;
+  gParam.link_type = param.type;
   gParam.reconstruct = param.reconstruct;
   gParam.setPrecision(gParam.Precision(), true);
-  
+
   int y[4];
-  int R[4] = {0,0,0,0};
-  for(int dir=0; dir<4; ++dir) if(comm_dim_partitioned(dir)) R[dir] = 2;
-  for(int dir=0; dir<4; ++dir) y[dir] = param.X[dir] + 2 * R[dir];
+  int R[4] = {0, 0, 0, 0};
+  for (int dir = 0; dir < 4; ++dir)
+    if (comm_dim_partitioned(dir)) R[dir] = 2;
+  for (int dir = 0; dir < 4; ++dir) y[dir] = param.X[dir] + 2 * R[dir];
   int pad = 0;
-  GaugeFieldParam gParamEx(y, prec, link_recon,
-			   pad, QUDA_VECTOR_GEOMETRY, QUDA_GHOST_EXCHANGE_EXTENDED);
+  GaugeFieldParam gParamEx(y, prec, link_recon, pad, QUDA_VECTOR_GEOMETRY, QUDA_GHOST_EXCHANGE_EXTENDED);
   gParamEx.create = QUDA_ZERO_FIELD_CREATE;
   gParamEx.order = gParam.order;
   gParamEx.siteSubset = QUDA_FULL_SITE_SUBSET;
   gParamEx.t_boundary = gParam.t_boundary;
   gParamEx.nFace = 1;
-  for(int dir=0; dir<4; ++dir) gParamEx.r[dir] = R[dir];
+  for (int dir = 0; dir < 4; ++dir) gParamEx.r[dir] = R[dir];
   U = new cudaGaugeField(gParamEx);
 
   // CURAND random generator initialization
   randstates = new RNG(gParam, 1234);
   randstates->Init();
-    
+
   int *num_failures_h = (int *)mapped_malloc(sizeof(int));
   int *num_failures_d = (int *)get_mapped_device_pointer(num_failures_h);
-  
+
   if (link_recon != QUDA_RECONSTRUCT_8 && coldstart)
     InitGaugeField(*U);
   else
     InitGaugeField(*U, *randstates);
-  
+
   // Reunitarization setup
   SetReunitarizationConsts();
   plaquette(*U);
-  
-  for(int step=1; step<=nsteps; ++step){
-    printfQuda("Step %d\n",step);
+
+  for (int step = 1; step <= nsteps; ++step) {
+    printfQuda("Step %d\n", step);
     Monte(*U, *randstates, beta_value, nhbsteps, novrsteps);
-    
-    //Reunitarize gauge links...
+
+    // Reunitarize gauge links...
     *num_failures_h = 0;
     unitarizeLinks(*U, num_failures_d);
     qudaDeviceSynchronize();
     if (*num_failures_h > 0) errorQuda("Error in the unitarization\n");
-    
+
     plaquette(*U);
   }
-  
+
   plaq = plaquette(*U);
   printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z);
-  
+
   host_free(num_failures_h);
 
   // Gauge Fixing Routines
   //---------------------------------------------------------------------------
   switch (test_type) {
-  case 0:  
+  case 0:
     printfQuda("%s gauge fixing with overrelaxation\n", gf_gauge_dir == 4 ? "Landau" : "Coulomb");
-    gaugeFixingOVR(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, gf_theta_condition);
+    gaugeFixingOVR(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance,
+                   gf_reunit_interval, gf_theta_condition);
     comparePlaquette(plaq, plaquette(*U));
     break;
-    
+
   case 1:
     if (!checkDimsPartitioned()) {
       printfQuda("%s gauge fixing with steepest descent method with FFTs\n", gf_gauge_dir == 4 ? "Landau" : "Coulomb");
-      gaugeFixingFFT(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, gf_theta_condition);
+      gaugeFixingFFT(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance,
+                     gf_theta_condition);
       comparePlaquette(plaq, plaquette(*U));
     } else {
       errorQuda("FFT gauge fixing not supported for multi GPU geometry");
     }
     break;
-    
-  default:
-    errorQuda("Unknown test type %d", test_type);
+
+  default: errorQuda("Unknown test type %d", test_type);
   }
 
   double2 link_trace = getLinkTrace(*U);
-  printfQuda("Tr: %.16e:%.16e\n", link_trace.x/3.0, link_trace.y/3.0);
+  printfQuda("Tr: %.16e:%.16e\n", link_trace.x / 3.0, link_trace.y / 3.0);
 
   // Save if output string is specified
-  if (strcmp(gauge_outfile,"")) {
-    
+  if (strcmp(gauge_outfile, "")) {
+
     printfQuda("Saving the gauge field to file %s\n", gauge_outfile);
 
     QudaGaugeParam gauge_param = newQudaGaugeParam();
     setWilsonGaugeParam(gauge_param);
-    
+
     void *cpu_gauge[4];
     for (int dir = 0; dir < 4; dir++) { cpu_gauge[dir] = malloc(V * gauge_site_size * gauge_param.cpu_prec); }
-    
+
     cudaGaugeField *gauge;
     gauge = new cudaGaugeField(gParam);
-    
+
     // copy into regular field
-    copyExtendedGauge(*gauge, *U, QUDA_CUDA_FIELD_LOCATION);    
-    saveGaugeFieldQuda((void*)cpu_gauge, (void*)gauge, &gauge_param);
-    
+    copyExtendedGauge(*gauge, *U, QUDA_CUDA_FIELD_LOCATION);
+    saveGaugeFieldQuda((void *)cpu_gauge, (void *)gauge, &gauge_param);
+
     // Write to disk
-    write_gauge_field(gauge_outfile, cpu_gauge, gauge_param.cpu_prec, gauge_param.X, 0, (char**)0);
-    
-    for (int dir = 0; dir<4; dir++) free(cpu_gauge[dir]);
+    write_gauge_field(gauge_outfile, cpu_gauge, gauge_param.cpu_prec, gauge_param.X, 0, (char **)0);
+
+    for (int dir = 0; dir < 4; dir++) free(cpu_gauge[dir]);
     delete gauge;
   } else {
     printfQuda("No output file specified.\n");
-  }  
-  
+  }
+
   delete U;
-  
-  //Release all temporary memory used for data exchange between GPUs in multi-GPU mode
+
+  // Release all temporary memory used for data exchange between GPUs in multi-GPU mode
   PGaugeExchangeFree();
-  
+
   randstates->Release();
   delete randstates;
-  
-  freeGaugeQuda();    
+
+  freeGaugeQuda();
   endQuda();
   finalizeComms();
-  
+
   return 0;
 }
diff --git a/tests/heatbath_test.cpp b/tests/heatbath_test.cpp
index a0734f1516..b11d569ad6 100644
--- a/tests/heatbath_test.cpp
+++ b/tests/heatbath_test.cpp
@@ -53,7 +53,7 @@ void display_test_info()
 }
 
 int main(int argc, char **argv)
-{  
+{
   // command line options
   auto app = make_app();
   add_heatbath_option_group(app);
diff --git a/tests/su3_test.cpp b/tests/su3_test.cpp
index 1c0b75c3a8..6cec875339 100644
--- a/tests/su3_test.cpp
+++ b/tests/su3_test.cpp
@@ -116,7 +116,7 @@ int main(int argc, char **argv)
 
   if (prec_sloppy == QUDA_INVALID_PRECISION) prec_sloppy = prec;
   if (link_recon_sloppy == QUDA_RECONSTRUCT_INVALID) link_recon_sloppy = link_recon;
- 
+
   initQuda(device_ordinal);
   setVerbosity(verbosity);
 
@@ -126,22 +126,22 @@ int main(int argc, char **argv)
   QudaGaugeParam gauge_param = newQudaGaugeParam();
   setGaugeParam(gauge_param);
   setDims(gauge_param.X);
-  
+
   // All user inputs now defined
   display_test_info();
-  
-  // *** QUDA parameters begin here.  
+
+  // *** QUDA parameters begin here.
   void *gauge[4], *new_gauge[4];
   for (int dir = 0; dir < 4; dir++) {
     gauge[dir] = malloc(V * gauge_site_size * host_gauge_data_type_size);
     new_gauge[dir] = malloc(V * gauge_site_size * host_gauge_data_type_size);
   }
-  
+
   constructHostGaugeField(gauge, gauge_param, argc, argv);
   // Load the gauge field to the device
   loadGaugeQuda((void *)gauge, &gauge_param);
   saveGaugeQuda(new_gauge, &gauge_param);
-  
+
   double plaq[3];
   plaqQuda(plaq);
   printfQuda("Computed plaquette gauge precise is %.16e (spatial = %.16e, temporal = %.16e)\n", plaq[0], plaq[1],
diff --git a/tests/utils/command_line_params.cpp b/tests/utils/command_line_params.cpp
index b0e312b69d..efe22d8fe0 100644
--- a/tests/utils/command_line_params.cpp
+++ b/tests/utils/command_line_params.cpp
@@ -1020,41 +1020,41 @@ void add_heatbath_option_group(std::shared_ptr<QUDAApp> quda_app)
   auto opgroup = quda_app->add_option_group("heatbath", "Options controlling heatbath tests");
   opgroup->add_option("--heatbath-beta", heatbath_beta_value, "Beta value used in heatbath test (default 6.2)");
   opgroup->add_option("--heatbath-coldstart", heatbath_coldstart,
-                       "Whether to use a cold or hot start in heatbath test (default false)");
+                      "Whether to use a cold or hot start in heatbath test (default false)");
   opgroup->add_option("--heatbath-num-hb-per-step", heatbath_num_heatbath_per_step,
-                       "Number of heatbath hits per heatbath step (default 5)");
+                      "Number of heatbath hits per heatbath step (default 5)");
   opgroup->add_option("--heatbath-num-or-per-step", heatbath_num_overrelax_per_step,
-                       "Number of overrelaxation hits per heatbath step (default 5)");
+                      "Number of overrelaxation hits per heatbath step (default 5)");
   opgroup->add_option("--heatbath-num-steps", heatbath_num_steps,
-                       "Number of measurement steps in heatbath test (default 10)");
+                      "Number of measurement steps in heatbath test (default 10)");
   opgroup->add_option("--heatbath-warmup-steps", heatbath_warmup_steps,
-                       "Number of warmup steps in heatbath test (default 10)");
+                      "Number of warmup steps in heatbath test (default 10)");
 }
 
 void add_gaugefix_option_group(std::shared_ptr<QUDAApp> quda_app)
 {
   // Option group for gauge fixing related options
   auto opgroup = quda_app->add_option_group("gaugefix", "Options controlling gauge fixing tests");
-  opgroup->add_option("--gf-dir", gf_gauge_dir, "The orthogonal direction of teh gauge fixing, 3=Coulomb, 4=Landau. (default 4)");
+  opgroup->add_option("--gf-dir", gf_gauge_dir,
+                      "The orthogonal direction of teh gauge fixing, 3=Coulomb, 4=Landau. (default 4)");
   opgroup->add_option("--gf-maxiter", gf_maxiter,
-                       "The maximun number of gauge fixing iterations to be applied (default 10000) ");
+                      "The maximun number of gauge fixing iterations to be applied (default 10000) ");
   opgroup->add_option("--gf-verbosity-interval", gf_verbosity_interval,
-                       "Print the gauge fixing progress every N steps (default 100)");
+                      "Print the gauge fixing progress every N steps (default 100)");
   opgroup->add_option("--gf-ovr-relaxation-boost", gf_ovr_relaxation_boost,
-                       "The overrelaxation boost parameter for the overrelaxation method (default 1.5)");
-  opgroup->add_option("--gf-fft-alpha", gf_fft_alpha,
-                       "The Alpha parameter in the FFT method (default 0.8)");
+                      "The overrelaxation boost parameter for the overrelaxation method (default 1.5)");
+  opgroup->add_option("--gf-fft-alpha", gf_fft_alpha, "The Alpha parameter in the FFT method (default 0.8)");
   opgroup->add_option("--gf-reunit-interval", gf_reunit_interval,
-                       "Reunitarise the gauge field every N steps (default 10)");
-  opgroup->add_option("--gf-tol", gf_tolerance,
-                       "The tolerance of the gauge fixing quality (default 1e-6)");
-  opgroup->add_option("--gf-theta-condition", gf_theta_condition,
-                       "Use the theta value to determine the gauge fixing if true. If false, use the delta value (default false)");
-  opgroup->add_option("--gf-fft-autotune", gf_fft_autotune,
-		       "In the FFT method, automatically adjust the alpha parameter if the quality begins to diverge (default false)");
+                      "Reunitarise the gauge field every N steps (default 10)");
+  opgroup->add_option("--gf-tol", gf_tolerance, "The tolerance of the gauge fixing quality (default 1e-6)");
+  opgroup->add_option(
+    "--gf-theta-condition", gf_theta_condition,
+    "Use the theta value to determine the gauge fixing if true. If false, use the delta value (default false)");
+  opgroup->add_option(
+    "--gf-fft-autotune", gf_fft_autotune,
+    "In the FFT method, automatically adjust the alpha parameter if the quality begins to diverge (default false)");
 }
 
-
 void add_comms_option_group(std::shared_ptr<QUDAApp> quda_app)
 {
   auto opgroup

From b34ef65737439678d92fb58d2e6f6b6aa3363bfc Mon Sep 17 00:00:00 2001
From: cpviolator <dmhowarth26@gmail.com>
Date: Sat, 1 May 2021 17:59:56 -0700
Subject: [PATCH 05/32] Allow for single case testing in gauge_alg_ctest, minor
 clean up of gauge fixing stdout and comments

---
 include/gauge_tools.h         |   4 +-
 include/quda.h                |   4 +-
 include/quda_milc_interface.h |   4 +-
 lib/gauge_fix_fft.cu          |  17 +-
 lib/gauge_fix_ovr.cu          |   9 +-
 tests/CMakeLists.txt          |   8 +-
 tests/gauge_alg_ctest.cpp     | 443 ++++++++++++++++++++++------------
 tests/gauge_alg_test.cpp      | 266 --------------------
 8 files changed, 313 insertions(+), 442 deletions(-)
 delete mode 100644 tests/gauge_alg_test.cpp

diff --git a/include/gauge_tools.h b/include/gauge_tools.h
index 9c1654f483..7b9e39b5b9 100644
--- a/include/gauge_tools.h
+++ b/include/gauge_tools.h
@@ -120,7 +120,7 @@ namespace quda
    * value is zero then the method stops when iteration reachs the
    * maximum number of steps defined by Nsteps
    * @param[in] reunit_interval, reunitarize gauge field when iteration count is a multiple of this
-   * @param[in] stopWtheta, 0 for MILC criterium and 1 to use the theta value
+   * @param[in] stopWtheta, 0 for MILC criterion and 1 to use the theta value
    */
   void gaugeFixingOVR(GaugeField &data, const int gauge_dir, const int Nsteps, const int verbose_interval,
                       const double relax_boost, const double tolerance, const int reunit_interval, const int stopWtheta);
@@ -136,7 +136,7 @@ namespace quda
    * @param[in] tolerance, torelance value to stop the method, if this
    * value is zero then the method stops when iteration reachs the
    * maximum number of steps defined by Nsteps
-   * @param[in] stopWtheta, 0 for MILC criterium and 1 to use the theta value
+   * @param[in] stopWtheta, 0 for MILC criterion and 1 to use the theta value
    */
   void gaugeFixingFFT(GaugeField &data, const int gauge_dir, const int Nsteps, const int verbose_interval,
                       const double alpha, const int autotune, const double tolerance, const int stopWtheta);
diff --git a/include/quda.h b/include/quda.h
index bbf55f037c..3151ca667e 100644
--- a/include/quda.h
+++ b/include/quda.h
@@ -1502,7 +1502,7 @@ extern "C" {
    * @param[in] relax_boost, gauge fixing parameter of the overrelaxation method, most common value is 1.5 or 1.7.
    * @param[in] tolerance, torelance value to stop the method, if this value is zero then the method stops when iteration reachs the maximum number of steps defined by Nsteps
    * @param[in] reunit_interval, reunitarize gauge field when iteration count is a multiple of this
-   * @param[in] stopWtheta, 0 for MILC criterium and 1 to use the theta value
+   * @param[in] stopWtheta, 0 for MILC criterion and 1 to use the theta value
    * @param[in] param The parameters of the external fields and the computation settings
    * @param[out] timeinfo
    */
@@ -1520,7 +1520,7 @@ extern "C" {
    * @param[in] autotune, 1 to autotune the method, i.e., if the Fg inverts its tendency we decrease the alpha value
    * @param[in] tolerance, torelance value to stop the method, if this value is zero then the method stops when
    * iteration reachs the maximum number of steps defined by Nsteps
-   * @param[in] stopWtheta, 0 for MILC criterium and 1 to use the theta value
+   * @param[in] stopWtheta, 0 for MILC criterion and 1 to use the theta value
    * @param[in] param The parameters of the external fields and the computation settings
    * @param[out] timeinfo
    */
diff --git a/include/quda_milc_interface.h b/include/quda_milc_interface.h
index fc9e54b151..cd5a95794c 100644
--- a/include/quda_milc_interface.h
+++ b/include/quda_milc_interface.h
@@ -963,7 +963,7 @@ extern "C" {
    * @param[in] relax_boost, gauge fixing parameter of the overrelaxation method, most common value is 1.5 or 1.7.
    * @param[in] tolerance, torelance value to stop the method, if this value is zero then the method stops when iteration reachs the maximum number of steps defined by Nsteps
    * @param[in] reunit_interval, reunitarize gauge field when iteration count is a multiple of this
-   * @param[in] stopWtheta, 0 for MILC criterium and 1 to use the theta value
+   * @param[in] stopWtheta, 0 for MILC criterion and 1 to use the theta value
    * @param[in,out] milc_sitelink, MILC gauge field to be fixed
    */
   void qudaGaugeFixingOVR( const int precision,
@@ -987,7 +987,7 @@ extern "C" {
    * @param[in] alpha, gauge fixing parameter of the method, most common value is 0.08
    * @param[in] autotune, 1 to autotune the method, i.e., if the Fg inverts its tendency we decrease the alpha value
    * @param[in] tolerance, torelance value to stop the method, if this value is zero then the method stops when iteration reachs the maximum number of steps defined by Nsteps
-   * @param[in] stopWtheta, 0 for MILC criterium and 1 to use the theta value
+   * @param[in] stopWtheta, 0 for MILC criterion and 1 to use the theta value
    * @param[in,out] milc_sitelink, MILC gauge field to be fixed
    */
   void qudaGaugeFixingFFT( int precision,
diff --git a/lib/gauge_fix_fft.cu b/lib/gauge_fix_fft.cu
index 2466000c46..a2d4f9c633 100644
--- a/lib/gauge_fix_fft.cu
+++ b/lib/gauge_fix_fft.cu
@@ -635,16 +635,13 @@ namespace quda {
     profileInternalGaugeFixFFT.TPSTART(QUDA_PROFILE_COMPUTE);
 
     Float alpha = alpha0;
-    std::cout << "\tAlpha parameter of the Steepest Descent Method: " << alpha << std::endl;
-    if ( autotune ) std::cout << "\tAuto tune active: yes" << std::endl;
-    else std::cout << "\tAuto tune active: no" << std::endl;
-    std::cout << "\tStop criterium: " << tolerance << std::endl;
-    if ( stopWtheta ) std::cout << "\tStop criterium method: theta" << std::endl;
-    else std::cout << "\tStop criterium method: Delta" << std::endl;
-    std::cout << "\tMaximum number of iterations: " << Nsteps << std::endl;
-    std::cout << "\tPrint convergence results at every " << verbose_interval << " steps" << std::endl;
-
-
+    printfQuda("\tAlpha parameter of the Steepest Descent Method: %e\n", alpha);
+    printfQuda("\tAuto tune active: %s\n", autotune ? "yes" : "no");
+    printfQuda("\tStop criterion: %e\n", tolerance);
+    printfQuda("\tStop criterion method: %s\n", stopWtheta ? "theta" : "delta");
+    printfQuda("\tMaximum number of iterations: %d\n", Nsteps);
+    printfQuda("\tPrint convergence results at every %d steps\n", verbose_interval);
+    
     unsigned int delta_pad = data.X()[0] * data.X()[1] * data.X()[2] * data.X()[3];
     int4 size = make_int4( data.X()[0], data.X()[1], data.X()[2], data.X()[3] );
     cufftHandle plan_xy;
diff --git a/lib/gauge_fix_ovr.cu b/lib/gauge_fix_ovr.cu
index 93793bbf61..1f3d6485f3 100644
--- a/lib/gauge_fix_ovr.cu
+++ b/lib/gauge_fix_ovr.cu
@@ -1071,13 +1071,12 @@ public:
     double byte = 0;
 
     printfQuda("\tOverrelaxation boost parameter: %lf\n", (double)relax_boost);
-    printfQuda("\tStop criterium: %lf\n", tolerance);
-    if ( stopWtheta ) printfQuda("\tStop criterium method: theta\n");
-    else printfQuda("\tStop criterium method: Delta\n");
+    printfQuda("\tStop criterion: %lf\n", tolerance);
+    printfQuda("\tStop criterion method: %s\n", stopWtheta ? "theta" : "delta");
     printfQuda("\tMaximum number of iterations: %d\n", Nsteps);
     printfQuda("\tReunitarize at every %d steps\n", reunit_interval);
     printfQuda("\tPrint convergence results at every %d steps\n", verbose_interval);
-
+    
     const double unitarize_eps = 1e-14;
     const double max_error = 1e-10;
     const int reunit_allow_svd = 1;
@@ -1398,7 +1397,7 @@ public:
    * @param[in] relax_boost, gauge fixing parameter of the overrelaxation method, most common value is 1.5 or 1.7.
    * @param[in] tolerance, torelance value to stop the method, if this value is zero then the method stops when iteration reachs the maximum number of steps defined by Nsteps
    * @param[in] reunit_interval, reunitarize gauge field when iteration count is a multiple of this
-   * @param[in] stopWtheta, 0 for MILC criterium and 1 to use the theta value
+   * @param[in] stopWtheta, 0 for MILC criterion and 1 to use the theta value
    */
   void gaugeFixingOVR(GaugeField& data, const int gauge_dir, const int Nsteps, const int verbose_interval, const double relax_boost,
                       const double tolerance, const int reunit_interval, const int stopWtheta) {
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index ed9e9df62f..560785ae2d 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -236,14 +236,10 @@ if(QUDA_FORCE_GAUGE)
 endif()
 
 if(QUDA_GAUGE_ALG)
-  add_executable(gauge_alg_test gauge_alg_test.cpp)
-  target_link_libraries(gauge_alg_test ${TEST_LIBS})
-  quda_checkbuildtest(gauge_alg_test QUDA_BUILD_ALL_TESTS)
-
   add_executable(gauge_alg_ctest gauge_alg_ctest.cpp)
   target_link_libraries(gauge_alg_ctest ${TEST_LIBS})
   quda_checkbuildtest(gauge_alg_ctest QUDA_BUILD_ALL_TESTS)
-  install(TARGETS gauge_alg_test ${QUDA_EXCLUDE_FROM_INSTALL} DESTINATION ${CMAKE_INSTALL_BINDIR})
+  install(TARGETS gauge_alg_ctest ${QUDA_EXCLUDE_FROM_INSTALL} DESTINATION ${CMAKE_INSTALL_BINDIR})
 
   add_executable(heatbath_test heatbath_test.cpp)
   target_link_libraries(heatbath_test ${TEST_LIBS})
@@ -809,7 +805,7 @@ foreach(prec IN LISTS TEST_PRECS)
 
   if(QUDA_GAUGE_ALG)
     add_test(NAME gauge_alg_${prec}
-             COMMAND ${QUDA_CTEST_LAUNCH} $<TARGET_FILE:gauge_alg_test> ${MPIEXEC_POSTFLAGS}
+             COMMAND ${QUDA_CTEST_LAUNCH} $<TARGET_FILE:gauge_alg ctest> ${MPIEXEC_POSTFLAGS}
                      --dim 2 4 6 8 --prec ${prec}
                      --gtest_output=xml:gauge_arg_test_${prec}.xml)
   endif()
diff --git a/tests/gauge_alg_ctest.cpp b/tests/gauge_alg_ctest.cpp
index cdaa2efd9a..881d53bb76 100644
--- a/tests/gauge_alg_ctest.cpp
+++ b/tests/gauge_alg_ctest.cpp
@@ -9,6 +9,7 @@
 #include <comm_quda.h>
 #include <host_utils.h>
 #include <command_line_params.h>
+#include <misc.h>
 #include <gauge_tools.h>
 
 #include <pgauge_monte.h>
@@ -21,6 +22,42 @@
 
 using namespace quda;
 
+//***********************************************************//
+// This boolean controls whether or not the full Google test //
+// is done. If the user passes a value of 1 or 2 to --test   //
+// then a single instance of OVR or FFT gauge fixing is done //
+// and the value of this bool is set to false. Otherwise the //
+// Google tests are performed.                               //
+//***********************************************************//
+bool execute = true;
+
+void display_test_info()
+{
+  printfQuda("running the following test:\n");
+
+  switch (test_type) {
+  case 0:
+    printfQuda("\n Google testing\n");
+    break;
+  case 1:
+    printfQuda("\nOVR gauge fix\n");
+    break;
+  case 2:
+    printfQuda("\nFFT gauge fix\n");
+    break;
+  default: errorQuda("Undefined test type %d given", test_type);
+  }
+  
+  printfQuda("prec    sloppy_prec    link_recon  sloppy_link_recon S_dimension T_dimension Ls_dimension\n");
+  printfQuda("%s   %s             %s            %s            %d/%d/%d          %d         %d\n", get_prec_str(prec),
+             get_prec_str(prec_sloppy), get_recon_str(link_recon), get_recon_str(link_recon_sloppy), xdim, ydim, zdim,
+             tdim, Lsdim);
+
+  printfQuda("Grid partition info:     X  Y  Z  T\n");
+  printfQuda("                         %d  %d  %d  %d\n", dimPartitioned(0), dimPartitioned(1), dimPartitioned(2),
+             dimPartitioned(3));
+}
+
 class GaugeAlgTest : public ::testing::Test
 {
 protected:
@@ -35,7 +72,9 @@ class GaugeAlgTest : public ::testing::Test
   int novrsteps;
   bool coldstart;
   double beta_value;
-
+  
+  bool unit_test;
+  
   RNG *randstates;
 
   void SetReunitarizationConsts()
@@ -64,45 +103,195 @@ class GaugeAlgTest : public ::testing::Test
     a2 = std::abs(a.z - b.z);
     double prec_val = 1.0e-5;
     if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1.0e-15;
-    if ((a0 < prec_val) && (a1 < prec_val) && (a2 < prec_val)) return true;
-    return false;
+    return ((a0 < prec_val) && (a1 < prec_val) && (a2 < prec_val));
   }
 
   bool CheckDeterminant(double2 detu)
   {
     double prec_val = 5e-8;
     if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1.0e-15;
-    if (std::abs(1.0 - detu.x) < prec_val && std::abs(detu.y) < prec_val) return true;
-    return false;
+    return (std::abs(1.0 - detu.x) < prec_val && std::abs(detu.y) < prec_val);
   }
 
   virtual void SetUp()
   {
-    setVerbosity(QUDA_VERBOSE);
-
-    param = newQudaGaugeParam();
-
-    // Setup gauge container.
-    param.cpu_prec = prec;
-    param.cpu_prec = prec;
-    param.cuda_prec = prec;
-    param.reconstruct = link_recon;
-    param.cuda_prec_sloppy = prec;
-    param.reconstruct_sloppy = link_recon;
-
-    param.type = QUDA_WILSON_LINKS;
-    param.gauge_order = QUDA_MILC_GAUGE_ORDER;
-
-    param.X[0] = xdim;
-    param.X[1] = ydim;
-    param.X[2] = zdim;
-    param.X[3] = tdim;
-    setDims(param.X);
+    if(execute) {
+      setVerbosity(QUDA_VERBOSE);
+      param = newQudaGaugeParam();
+      
+      // Setup gauge container.
+      param.cpu_prec = prec;
+      param.cpu_prec = prec;
+      param.cuda_prec = prec;
+      param.reconstruct = link_recon;
+      param.cuda_prec_sloppy = prec;
+      param.reconstruct_sloppy = link_recon;
+      
+      param.type = QUDA_WILSON_LINKS;
+      param.gauge_order = QUDA_MILC_GAUGE_ORDER;
+      
+      param.X[0] = xdim;
+      param.X[1] = ydim;
+      param.X[2] = zdim;
+      param.X[3] = tdim;
+      setDims(param.X);
+      
+      param.anisotropy = 1.0; // don't support anisotropy for now!!!!!!
+      param.t_boundary = QUDA_PERIODIC_T;
+      param.gauge_fix = QUDA_GAUGE_FIXED_NO;
+      param.ga_pad = 0;
+      
+      GaugeFieldParam gParam(0, param);
+      gParam.pad = 0;
+      gParam.ghostExchange = QUDA_GHOST_EXCHANGE_NO;
+      gParam.create = QUDA_NULL_FIELD_CREATE;
+      gParam.link_type = param.type;
+      gParam.reconstruct = param.reconstruct;
+      gParam.setPrecision(gParam.Precision(), true);
+      
+#ifdef MULTI_GPU
+      int y[4];
+      int R[4] = {0, 0, 0, 0};
+      for (int dir = 0; dir < 4; ++dir)
+	if (comm_dim_partitioned(dir)) R[dir] = 2;
+      for (int dir = 0; dir < 4; ++dir) y[dir] = param.X[dir] + 2 * R[dir];
+      int pad = 0;
+      GaugeFieldParam gParamEx(y, prec, link_recon, pad, QUDA_VECTOR_GEOMETRY, QUDA_GHOST_EXCHANGE_EXTENDED);
+      gParamEx.create = QUDA_ZERO_FIELD_CREATE;
+      gParamEx.order = gParam.order;
+      gParamEx.siteSubset = QUDA_FULL_SITE_SUBSET;
+      gParamEx.t_boundary = gParam.t_boundary;
+      gParamEx.nFace = 1;
+      for (int dir = 0; dir < 4; ++dir) gParamEx.r[dir] = R[dir];
+      U = new cudaGaugeField(gParamEx);
+#else
+      U = new cudaGaugeField(gParam);
+#endif
+      // CURAND random generator initialization
+      randstates = new RNG(gParam, 1234);
+      randstates->Init();
+      
+      nsteps = heatbath_num_steps;
+      nhbsteps = heatbath_num_heatbath_per_step;
+      novrsteps = heatbath_num_overrelax_per_step;
+      coldstart = heatbath_coldstart;
+      beta_value = heatbath_beta_value;
+      
+      a0.Start(__func__, __FILE__, __LINE__);
+      a1.Start(__func__, __FILE__, __LINE__);
+      
+      int *num_failures_h = (int *)mapped_malloc(sizeof(int));
+      int *num_failures_d = (int *)get_mapped_device_pointer(num_failures_h);
+      
+      if (link_recon != QUDA_RECONSTRUCT_8 && coldstart)
+	InitGaugeField(*U);
+      else
+	InitGaugeField(*U, *randstates);
+      
+      // Reunitarization setup
+      SetReunitarizationConsts();
+      plaquette(*U);
+      
+      for (int step = 1; step <= nsteps; ++step) {
+	printfQuda("Step %d\n", step);
+	Monte(*U, *randstates, beta_value, nhbsteps, novrsteps);
+	
+	// Reunitarize gauge links...
+	*num_failures_h = 0;
+	unitarizeLinks(*U, num_failures_d);
+	qudaDeviceSynchronize();
+	if (*num_failures_h > 0) errorQuda("Error in the unitarization\n");
+	
+	plaquette(*U);
+      }
+      a1.Stop(__func__, __FILE__, __LINE__);
+      
+      printfQuda("Time Monte -> %.6f s\n", a1.Last());
+      plaq = plaquette(*U);
+      printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z);
+      
+      host_free(num_failures_h);
+
+      // If a specific test type is requested, perfrom it now and then
+      // turn off all Google tests in the tear down.
+      switch (test_type) {
+      case 0:
+	// Do the Google testing
+	break;
+      case 1:
+	run_ovr();
+	break;
+      case 2:
+	run_fft();
+	break;    
+      default:
+	errorQuda("Invalid test type %d ", test_type);
+      }
+    }
+  }
+  
+  virtual void TearDown()
+  {
+    if(execute) {
+      detu = getLinkDeterminant(*U);
+      double2 tru = getLinkTrace(*U);
+      printfQuda("Det: %.16e:%.16e\n", detu.x, detu.y);
+      printfQuda("Tr: %.16e:%.16e\n", tru.x / 3.0, tru.y / 3.0);
+      
+      delete U;
+      // Release all temporary memory used for data exchange between GPUs in multi-GPU mode
+      PGaugeExchangeFree();
+      
+      a0.Stop(__func__, __FILE__, __LINE__);
+      printfQuda("Time -> %.6f s\n", a0.Last());
+      randstates->Release();
+      delete randstates;      
+    }
+    // If we performed a specific instance, switch off the
+    // Google testing.
+    if(test_type != 0) execute = false;
+  }
+  
+  virtual void run_ovr()
+  {
+    if(execute) {
+      gaugeFixingOVR(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval,
+		     gf_theta_condition);
+      auto plaq_gf = plaquette(*U);
+      printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
+      ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
+      // Save if output string is specified
+      if (strcmp(gauge_outfile, "")) save_gauge();
+    }
+  }
+  virtual void run_fft()
+  {
+    if(execute) {
+      if (!checkDimsPartitioned()) {
+	printfQuda("Landau gauge fixing with steepest descent method with FFTs\n");
+	gaugeFixingFFT(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance,
+		       gf_theta_condition);
+	
+	auto plaq_gf = plaquette(*U);	
+	printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
+	ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
+	// Save if output string is specified
+	if (strcmp(gauge_outfile, "")) save_gauge();
+      } else {
+	errorQuda("Cannot perform FFT gauge fixing with MPI partitions.");
+      }
+    }
+  }
 
-    param.anisotropy = 1.0; // don't support anisotropy for now!!!!!!
-    param.t_boundary = QUDA_PERIODIC_T;
-    param.gauge_fix = QUDA_GAUGE_FIXED_NO;
-    param.ga_pad = 0;
+  virtual void save_gauge() {
+    
+    printfQuda("Saving the gauge field to file %s\n", gauge_outfile);
+    
+    QudaGaugeParam gauge_param = newQudaGaugeParam();
+    setWilsonGaugeParam(gauge_param);
+    
+    void *cpu_gauge[4];
+    for (int dir = 0; dir < 4; dir++) { cpu_gauge[dir] = malloc(V * gauge_site_size * gauge_param.cpu_prec); }
 
     GaugeFieldParam gParam(0, param);
     gParam.pad = 0;
@@ -111,168 +300,124 @@ class GaugeAlgTest : public ::testing::Test
     gParam.link_type = param.type;
     gParam.reconstruct = param.reconstruct;
     gParam.setPrecision(gParam.Precision(), true);
-
-#ifdef MULTI_GPU
-    int y[4];
-    int R[4] = {0, 0, 0, 0};
-    for (int dir = 0; dir < 4; ++dir)
-      if (comm_dim_partitioned(dir)) R[dir] = 2;
-    for (int dir = 0; dir < 4; ++dir) y[dir] = param.X[dir] + 2 * R[dir];
-    int pad = 0;
-    GaugeFieldParam gParamEx(y, prec, link_recon, pad, QUDA_VECTOR_GEOMETRY, QUDA_GHOST_EXCHANGE_EXTENDED);
-    gParamEx.create = QUDA_ZERO_FIELD_CREATE;
-    gParamEx.order = gParam.order;
-    gParamEx.siteSubset = QUDA_FULL_SITE_SUBSET;
-    gParamEx.t_boundary = gParam.t_boundary;
-    gParamEx.nFace = 1;
-    for (int dir = 0; dir < 4; ++dir) gParamEx.r[dir] = R[dir];
-    U = new cudaGaugeField(gParamEx);
-#else
-    U = new cudaGaugeField(gParam);
-#endif
-    // CURAND random generator initialization
-    randstates = new RNG(gParam, 1234);
-    randstates->Init();
-
-    nsteps = heatbath_num_steps;
-    nhbsteps = heatbath_num_heatbath_per_step;
-    novrsteps = heatbath_num_overrelax_per_step;
-    coldstart = heatbath_coldstart;
-    beta_value = heatbath_beta_value;
-
-    a0.Start(__func__, __FILE__, __LINE__);
-    a1.Start(__func__, __FILE__, __LINE__);
-
-    int *num_failures_h = (int *)mapped_malloc(sizeof(int));
-    int *num_failures_d = (int *)get_mapped_device_pointer(num_failures_h);
-
-    if (link_recon != QUDA_RECONSTRUCT_8 && coldstart)
-      InitGaugeField(*U);
-    else
-      InitGaugeField(*U, *randstates);
-
-    // Reunitarization setup
-    SetReunitarizationConsts();
-    plaquette(*U);
-
-    for (int step = 1; step <= nsteps; ++step) {
-      printfQuda("Step %d\n", step);
-      Monte(*U, *randstates, beta_value, nhbsteps, novrsteps);
-
-      // Reunitarize gauge links...
-      *num_failures_h = 0;
-      unitarizeLinks(*U, num_failures_d);
-      qudaDeviceSynchronize();
-      if (*num_failures_h > 0) errorQuda("Error in the unitarization\n");
-
-      plaquette(*U);
-    }
-    a1.Stop(__func__, __FILE__, __LINE__);
-
-    printfQuda("Time Monte -> %.6f s\n", a1.Last());
-    plaq = plaquette(*U);
-    printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z);
-
-    host_free(num_failures_h);
-  }
-
-  virtual void TearDown()
-  {
-    detu = getLinkDeterminant(*U);
-    double2 tru = getLinkTrace(*U);
-    printfQuda("Det: %.16e:%.16e\n", detu.x, detu.y);
-    printfQuda("Tr: %.16e:%.16e\n", tru.x / 3.0, tru.y / 3.0);
-
-    delete U;
-    // Release all temporary memory used for data exchange between GPUs in multi-GPU mode
-    PGaugeExchangeFree();
-
-    a0.Stop(__func__, __FILE__, __LINE__);
-    printfQuda("Time -> %.6f s\n", a0.Last());
-    randstates->Release();
-    delete randstates;
+  
+    cudaGaugeField *gauge;
+    gauge = new cudaGaugeField(gParam);
+    
+    // copy into regular field
+    copyExtendedGauge(*gauge, *U, QUDA_CUDA_FIELD_LOCATION);
+    saveGaugeFieldQuda((void *)cpu_gauge, (void *)gauge, &gauge_param);
+      
+    // Write to disk
+    write_gauge_field(gauge_outfile, cpu_gauge, gauge_param.cpu_prec, gauge_param.X, 0, (char **)0);
+    
+    for (int dir = 0; dir < 4; dir++) free(cpu_gauge[dir]);
+    delete gauge;
   }
 };
-
+  
 TEST_F(GaugeAlgTest, Generation)
 {
-  detu = getLinkDeterminant(*U);
-  ASSERT_TRUE(CheckDeterminant(detu));
+  if(execute) {
+    detu = getLinkDeterminant(*U);
+    ASSERT_TRUE(CheckDeterminant(detu));
+  }
 }
 
 TEST_F(GaugeAlgTest, Landau_Overrelaxation)
 {
-  printfQuda("Landau gauge fixing with overrelaxation\n");
-  gaugeFixingOVR(*U, 4, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval,
-                 gf_theta_condition);
-  auto plaq_gf = plaquette(*U);
-  printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
-  ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
+  if(execute) {
+    printfQuda("Landau gauge fixing with overrelaxation\n");
+    gaugeFixingOVR(*U, 4, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval,
+		   gf_theta_condition);
+    auto plaq_gf = plaquette(*U);
+    printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
+    ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
+  }
 }
 
 TEST_F(GaugeAlgTest, Coulomb_Overrelaxation)
 {
-  printfQuda("Coulomb gauge fixing with overrelaxation\n");
-  gaugeFixingOVR(*U, 3, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval,
-                 gf_theta_condition);
-  auto plaq_gf = plaquette(*U);
-  printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
-  ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
+  if(execute) {
+    printfQuda("Coulomb gauge fixing with overrelaxation\n");
+    gaugeFixingOVR(*U, 3, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval,
+		   gf_theta_condition);
+    auto plaq_gf = plaquette(*U);
+    printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
+    ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
+  }
 }
 
 TEST_F(GaugeAlgTest, Landau_FFT)
 {
-  if (!checkDimsPartitioned()) {
-    printfQuda("Landau gauge fixing with steepest descent method with FFTs\n");
-    gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance,
-                   gf_theta_condition);
-    auto plaq_gf = plaquette(*U);
-    printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
-    ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
+  if(execute) {
+    if (!checkDimsPartitioned()) {
+      printfQuda("Landau gauge fixing with steepest descent method with FFTs\n");
+      gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance,
+		     gf_theta_condition);
+      auto plaq_gf = plaquette(*U);
+      printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
+      ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
+    }
   }
 }
 
 TEST_F(GaugeAlgTest, Coulomb_FFT)
 {
-  if (!checkDimsPartitioned()) {
-    printfQuda("Coulomb gauge fixing with steepest descent method with FFTs\n");
-    gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance,
-                   gf_theta_condition);
-    auto plaq_gf = plaquette(*U);
-    printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
-    ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
+  if(execute) {
+    if (!checkDimsPartitioned()) {
+      printfQuda("Coulomb gauge fixing with steepest descent method with FFTs\n");
+      gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance,
+		     gf_theta_condition);
+      auto plaq_gf = plaquette(*U);
+      printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
+      ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
+    }
   }
 }
 
 int main(int argc, char **argv)
 {
-  // initalize google test, includes command line options
-  ::testing::InitGoogleTest(&argc, argv);
-  // return code for google test
-  int test_rc = 0;
-  xdim = ydim = zdim = tdim = 32;
-
   // command line options
   auto app = make_app();
   add_gaugefix_option_group(app);
   add_heatbath_option_group(app);
+  
+  test_type = 0;
+  CLI::TransformPairs<int> test_type_map {{"Google", 0}, {"OVR", 1}, {"FFT", 2}};
+  app->add_option("--test", test_type, "Test method")->transform(CLI::CheckedTransformer(test_type_map));
   try {
     app->parse(argc, argv);
   } catch (const CLI::ParseError &e) {
     return app->exit(e);
   }
 
+  if (prec_sloppy == QUDA_INVALID_PRECISION) prec_sloppy = prec;
+  if (link_recon_sloppy == QUDA_RECONSTRUCT_INVALID) link_recon_sloppy = link_recon;
+
+  // initialize QMP/MPI, QUDA comms grid and RNG (host_utils.cpp)  
   initComms(argc, argv, gridsize_from_cmdline);
 
+  // call srand() with a rank-dependent seed
+  initRand();
+  
+  display_test_info();
+
+  // initialize the QUDA library
+  initQuda(device_ordinal);
+
+  // initalize google test, includes command line options
+  ::testing::InitGoogleTest(&argc, argv);
+
   // Ensure gtest prints only from rank 0
   ::testing::TestEventListeners &listeners = ::testing::UnitTest::GetInstance()->listeners();
   if (comm_rank() != 0) { delete listeners.Release(listeners.default_result_printer()); }
 
-  initQuda(device_ordinal);
-  test_rc = RUN_ALL_TESTS();
-  endQuda();
-
+  // return code for google test
+  int test_rc = RUN_ALL_TESTS();
+  
+  endQuda();  
   finalizeComms();
-
+  
   return test_rc;
 }
diff --git a/tests/gauge_alg_test.cpp b/tests/gauge_alg_test.cpp
deleted file mode 100644
index ad7985c464..0000000000
--- a/tests/gauge_alg_test.cpp
+++ /dev/null
@@ -1,266 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include <quda.h>
-#include <quda_internal.h>
-#include <gauge_field.h>
-
-#include <comm_quda.h>
-#include <host_utils.h>
-#include <command_line_params.h>
-#include <misc.h>
-#include <gauge_tools.h>
-
-#include <pgauge_monte.h>
-#include <random_quda.h>
-#include <unitarization_links.h>
-
-#include <qio_field.h>
-
-#include <gtest/gtest.h>
-
-using namespace quda;
-
-void display_test_info()
-{
-  printfQuda("running the following test:\n");
-
-  printfQuda("prec    sloppy_prec    link_recon  sloppy_link_recon S_dimension T_dimension Ls_dimension\n");
-  printfQuda("%s   %s             %s            %s            %d/%d/%d          %d         %d\n", get_prec_str(prec),
-             get_prec_str(prec_sloppy), get_recon_str(link_recon), get_recon_str(link_recon_sloppy), xdim, ydim, zdim,
-             tdim, Lsdim);
-
-  printfQuda("Grid partition info:     X  Y  Z  T\n");
-  printfQuda("                         %d  %d  %d  %d\n", dimPartitioned(0), dimPartitioned(1), dimPartitioned(2),
-             dimPartitioned(3));
-}
-
-void SetReunitarizationConsts()
-{
-  const double unitarize_eps = 1e-14;
-  const double max_error = 1e-10;
-  const int reunit_allow_svd = 1;
-  const int reunit_svd_only = 0;
-  const double svd_rel_error = 1e-6;
-  const double svd_abs_error = 1e-6;
-  setUnitarizeLinksConstants(unitarize_eps, max_error, reunit_allow_svd, reunit_svd_only, svd_rel_error, svd_abs_error);
-}
-
-bool checkDimsPartitioned()
-{
-  if (comm_dim_partitioned(0) || comm_dim_partitioned(1) || comm_dim_partitioned(2) || comm_dim_partitioned(3))
-    return true;
-  return false;
-}
-
-bool comparePlaquette(double3 a, double3 b)
-{
-  printfQuda("Plaq:    %.16e, %.16e, %.16e\n", a.x, a.y, a.z);
-  printfQuda("Plaq_gf: %.16e, %.16e, %.16e\n", b.x, b.y, b.z);
-  double a0, a1, a2;
-  a0 = std::abs(a.x - b.x);
-  a1 = std::abs(a.y - b.y);
-  a2 = std::abs(a.z - b.z);
-  double prec_val = 1.0e-5;
-  if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1.0e-15;
-  return ((a0 < prec_val) && (a1 < prec_val) && (a2 < prec_val));
-}
-
-bool checkDeterminant(double2 detu)
-{
-  printfQuda("Det: %.16e: %.16e\n", detu.x, detu.y);
-  double prec_val = 5e-8;
-  if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1.0e-15;
-  return std::abs(1.0 - detu.x) < prec_val && std::abs(detu.y) < prec_val;
-}
-
-int main(int argc, char **argv)
-{
-  // command line options
-  auto app = make_app();
-  add_gaugefix_option_group(app);
-  add_heatbath_option_group(app);
-  CLI::TransformPairs<int> test_type_map {{"OVR", 0}, {"FFT", 1}};
-  app->add_option("--test", test_type, "Test method")->transform(CLI::CheckedTransformer(test_type_map));
-  try {
-    app->parse(argc, argv);
-  } catch (const CLI::ParseError &e) {
-    return app->exit(e);
-  }
-
-  if (prec_sloppy == QUDA_INVALID_PRECISION) prec_sloppy = prec;
-  if (link_recon_sloppy == QUDA_RECONSTRUCT_INVALID) link_recon_sloppy = link_recon;
-
-  // initialize QMP/MPI, QUDA comms grid and RNG (host_utils.cpp)
-  initComms(argc, argv, gridsize_from_cmdline);
-
-  // call srand() with a rank-dependent seed
-  initRand();
-
-  display_test_info();
-
-  // initialize the QUDA library
-  initQuda(device_ordinal);
-
-  // *** QUDA parameters begin here.
-  setVerbosity(QUDA_VERBOSE);
-  QudaGaugeParam param = newQudaGaugeParam();
-
-  double3 plaq;
-  cudaGaugeField *U;
-  int nsteps = heatbath_num_steps;
-  int nhbsteps = heatbath_num_heatbath_per_step;
-  int novrsteps = heatbath_num_overrelax_per_step;
-  bool coldstart = heatbath_coldstart;
-  double beta_value = heatbath_beta_value;
-
-  RNG *randstates;
-
-  // Setup gauge container.
-  param.cpu_prec = prec;
-  param.cpu_prec = prec;
-  param.cuda_prec = prec;
-  param.reconstruct = link_recon;
-  param.cuda_prec_sloppy = prec;
-  param.reconstruct_sloppy = link_recon;
-
-  param.type = QUDA_WILSON_LINKS;
-  param.gauge_order = QUDA_MILC_GAUGE_ORDER;
-
-  param.X[0] = xdim;
-  param.X[1] = ydim;
-  param.X[2] = zdim;
-  param.X[3] = tdim;
-  setDims(param.X);
-
-  param.anisotropy = 1.0; // don't support anisotropy for now!!!!!!
-  param.t_boundary = QUDA_PERIODIC_T;
-  param.gauge_fix = QUDA_GAUGE_FIXED_NO;
-  param.ga_pad = 0;
-
-  GaugeFieldParam gParam(0, param);
-  gParam.pad = 0;
-  gParam.ghostExchange = QUDA_GHOST_EXCHANGE_NO;
-  gParam.create = QUDA_NULL_FIELD_CREATE;
-  gParam.link_type = param.type;
-  gParam.reconstruct = param.reconstruct;
-  gParam.setPrecision(gParam.Precision(), true);
-
-  int y[4];
-  int R[4] = {0, 0, 0, 0};
-  for (int dir = 0; dir < 4; ++dir)
-    if (comm_dim_partitioned(dir)) R[dir] = 2;
-  for (int dir = 0; dir < 4; ++dir) y[dir] = param.X[dir] + 2 * R[dir];
-  int pad = 0;
-  GaugeFieldParam gParamEx(y, prec, link_recon, pad, QUDA_VECTOR_GEOMETRY, QUDA_GHOST_EXCHANGE_EXTENDED);
-  gParamEx.create = QUDA_ZERO_FIELD_CREATE;
-  gParamEx.order = gParam.order;
-  gParamEx.siteSubset = QUDA_FULL_SITE_SUBSET;
-  gParamEx.t_boundary = gParam.t_boundary;
-  gParamEx.nFace = 1;
-  for (int dir = 0; dir < 4; ++dir) gParamEx.r[dir] = R[dir];
-  U = new cudaGaugeField(gParamEx);
-
-  // CURAND random generator initialization
-  randstates = new RNG(gParam, 1234);
-  randstates->Init();
-
-  int *num_failures_h = (int *)mapped_malloc(sizeof(int));
-  int *num_failures_d = (int *)get_mapped_device_pointer(num_failures_h);
-
-  if (link_recon != QUDA_RECONSTRUCT_8 && coldstart)
-    InitGaugeField(*U);
-  else
-    InitGaugeField(*U, *randstates);
-
-  // Reunitarization setup
-  SetReunitarizationConsts();
-  plaquette(*U);
-
-  for (int step = 1; step <= nsteps; ++step) {
-    printfQuda("Step %d\n", step);
-    Monte(*U, *randstates, beta_value, nhbsteps, novrsteps);
-
-    // Reunitarize gauge links...
-    *num_failures_h = 0;
-    unitarizeLinks(*U, num_failures_d);
-    qudaDeviceSynchronize();
-    if (*num_failures_h > 0) errorQuda("Error in the unitarization\n");
-
-    plaquette(*U);
-  }
-
-  plaq = plaquette(*U);
-  printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z);
-
-  host_free(num_failures_h);
-
-  // Gauge Fixing Routines
-  //---------------------------------------------------------------------------
-  switch (test_type) {
-  case 0:
-    printfQuda("%s gauge fixing with overrelaxation\n", gf_gauge_dir == 4 ? "Landau" : "Coulomb");
-    gaugeFixingOVR(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance,
-                   gf_reunit_interval, gf_theta_condition);
-    comparePlaquette(plaq, plaquette(*U));
-    break;
-
-  case 1:
-    if (!checkDimsPartitioned()) {
-      printfQuda("%s gauge fixing with steepest descent method with FFTs\n", gf_gauge_dir == 4 ? "Landau" : "Coulomb");
-      gaugeFixingFFT(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance,
-                     gf_theta_condition);
-      comparePlaquette(plaq, plaquette(*U));
-    } else {
-      errorQuda("FFT gauge fixing not supported for multi GPU geometry");
-    }
-    break;
-
-  default: errorQuda("Unknown test type %d", test_type);
-  }
-
-  double2 link_trace = getLinkTrace(*U);
-  printfQuda("Tr: %.16e:%.16e\n", link_trace.x / 3.0, link_trace.y / 3.0);
-
-  // Save if output string is specified
-  if (strcmp(gauge_outfile, "")) {
-
-    printfQuda("Saving the gauge field to file %s\n", gauge_outfile);
-
-    QudaGaugeParam gauge_param = newQudaGaugeParam();
-    setWilsonGaugeParam(gauge_param);
-
-    void *cpu_gauge[4];
-    for (int dir = 0; dir < 4; dir++) { cpu_gauge[dir] = malloc(V * gauge_site_size * gauge_param.cpu_prec); }
-
-    cudaGaugeField *gauge;
-    gauge = new cudaGaugeField(gParam);
-
-    // copy into regular field
-    copyExtendedGauge(*gauge, *U, QUDA_CUDA_FIELD_LOCATION);
-    saveGaugeFieldQuda((void *)cpu_gauge, (void *)gauge, &gauge_param);
-
-    // Write to disk
-    write_gauge_field(gauge_outfile, cpu_gauge, gauge_param.cpu_prec, gauge_param.X, 0, (char **)0);
-
-    for (int dir = 0; dir < 4; dir++) free(cpu_gauge[dir]);
-    delete gauge;
-  } else {
-    printfQuda("No output file specified.\n");
-  }
-
-  delete U;
-
-  // Release all temporary memory used for data exchange between GPUs in multi-GPU mode
-  PGaugeExchangeFree();
-
-  randstates->Release();
-  delete randstates;
-
-  freeGaugeQuda();
-  endQuda();
-  finalizeComms();
-
-  return 0;
-}

From d38486b2ea62b248778fec15fab5b1f02cf0cde7 Mon Sep 17 00:00:00 2001
From: cpviolator <dmhowarth26@gmail.com>
Date: Sat, 1 May 2021 18:01:05 -0700
Subject: [PATCH 06/32] Clang tidy

---
 include/quda.h                |   3 +-
 include/quda_milc_interface.h |   8 +-
 tests/gauge_alg_ctest.cpp     | 180 ++++++++++++++++------------------
 3 files changed, 91 insertions(+), 100 deletions(-)

diff --git a/include/quda.h b/include/quda.h
index 3151ca667e..1157fe95f4 100644
--- a/include/quda.h
+++ b/include/quda.h
@@ -1500,7 +1500,8 @@ extern "C" {
    * @param[in] Nsteps, maximum number of steps to perform gauge fixing
    * @param[in] verbose_interval, print gauge fixing info when iteration count is a multiple of this
    * @param[in] relax_boost, gauge fixing parameter of the overrelaxation method, most common value is 1.5 or 1.7.
-   * @param[in] tolerance, torelance value to stop the method, if this value is zero then the method stops when iteration reachs the maximum number of steps defined by Nsteps
+   * @param[in] tolerance, torelance value to stop the method, if this value is zero then the method stops when
+   * iteration reachs the maximum number of steps defined by Nsteps
    * @param[in] reunit_interval, reunitarize gauge field when iteration count is a multiple of this
    * @param[in] stopWtheta, 0 for MILC criterion and 1 to use the theta value
    * @param[in] param The parameters of the external fields and the computation settings
diff --git a/include/quda_milc_interface.h b/include/quda_milc_interface.h
index cd5a95794c..d11364d51c 100644
--- a/include/quda_milc_interface.h
+++ b/include/quda_milc_interface.h
@@ -953,7 +953,6 @@ extern "C" {
    */
   void qudaDestroyGaugeField(void* gauge);
 
-
   /**
    * @brief Gauge fixing with overrelaxation with support for single and multi GPU.
    * @param[in] precision, 1 for single precision else for double precision
@@ -961,7 +960,8 @@ extern "C" {
    * @param[in] Nsteps, maximum number of steps to perform gauge fixing
    * @param[in] verbose_interval, print gauge fixing info when iteration count is a multiple of this
    * @param[in] relax_boost, gauge fixing parameter of the overrelaxation method, most common value is 1.5 or 1.7.
-   * @param[in] tolerance, torelance value to stop the method, if this value is zero then the method stops when iteration reachs the maximum number of steps defined by Nsteps
+   * @param[in] tolerance, torelance value to stop the method, if this value is zero then the method stops when
+   * iteration reachs the maximum number of steps defined by Nsteps
    * @param[in] reunit_interval, reunitarize gauge field when iteration count is a multiple of this
    * @param[in] stopWtheta, 0 for MILC criterion and 1 to use the theta value
    * @param[in,out] milc_sitelink, MILC gauge field to be fixed
@@ -977,7 +977,6 @@ extern "C" {
     void* milc_sitelink
     );
 
-
   /**
    * @brief Gauge fixing with Steepest descent method with FFTs with support for single GPU only.
    * @param[in] precision, 1 for single precision else for double precision
@@ -986,7 +985,8 @@ extern "C" {
    * @param[in] verbose_interval, print gauge fixing info when iteration count is a multiple of this
    * @param[in] alpha, gauge fixing parameter of the method, most common value is 0.08
    * @param[in] autotune, 1 to autotune the method, i.e., if the Fg inverts its tendency we decrease the alpha value
-   * @param[in] tolerance, torelance value to stop the method, if this value is zero then the method stops when iteration reachs the maximum number of steps defined by Nsteps
+   * @param[in] tolerance, torelance value to stop the method, if this value is zero then the method stops when
+   * iteration reachs the maximum number of steps defined by Nsteps
    * @param[in] stopWtheta, 0 for MILC criterion and 1 to use the theta value
    * @param[in,out] milc_sitelink, MILC gauge field to be fixed
    */
diff --git a/tests/gauge_alg_ctest.cpp b/tests/gauge_alg_ctest.cpp
index 881d53bb76..f797420c9c 100644
--- a/tests/gauge_alg_ctest.cpp
+++ b/tests/gauge_alg_ctest.cpp
@@ -36,18 +36,12 @@ void display_test_info()
   printfQuda("running the following test:\n");
 
   switch (test_type) {
-  case 0:
-    printfQuda("\n Google testing\n");
-    break;
-  case 1:
-    printfQuda("\nOVR gauge fix\n");
-    break;
-  case 2:
-    printfQuda("\nFFT gauge fix\n");
-    break;
+  case 0: printfQuda("\n Google testing\n"); break;
+  case 1: printfQuda("\nOVR gauge fix\n"); break;
+  case 2: printfQuda("\nFFT gauge fix\n"); break;
   default: errorQuda("Undefined test type %d given", test_type);
   }
-  
+
   printfQuda("prec    sloppy_prec    link_recon  sloppy_link_recon S_dimension T_dimension Ls_dimension\n");
   printfQuda("%s   %s             %s            %s            %d/%d/%d          %d         %d\n", get_prec_str(prec),
              get_prec_str(prec_sloppy), get_recon_str(link_recon), get_recon_str(link_recon_sloppy), xdim, ydim, zdim,
@@ -72,9 +66,9 @@ class GaugeAlgTest : public ::testing::Test
   int novrsteps;
   bool coldstart;
   double beta_value;
-  
+
   bool unit_test;
-  
+
   RNG *randstates;
 
   void SetReunitarizationConsts()
@@ -115,10 +109,10 @@ class GaugeAlgTest : public ::testing::Test
 
   virtual void SetUp()
   {
-    if(execute) {
+    if (execute) {
       setVerbosity(QUDA_VERBOSE);
       param = newQudaGaugeParam();
-      
+
       // Setup gauge container.
       param.cpu_prec = prec;
       param.cpu_prec = prec;
@@ -126,21 +120,21 @@ class GaugeAlgTest : public ::testing::Test
       param.reconstruct = link_recon;
       param.cuda_prec_sloppy = prec;
       param.reconstruct_sloppy = link_recon;
-      
+
       param.type = QUDA_WILSON_LINKS;
       param.gauge_order = QUDA_MILC_GAUGE_ORDER;
-      
+
       param.X[0] = xdim;
       param.X[1] = ydim;
       param.X[2] = zdim;
       param.X[3] = tdim;
       setDims(param.X);
-      
+
       param.anisotropy = 1.0; // don't support anisotropy for now!!!!!!
       param.t_boundary = QUDA_PERIODIC_T;
       param.gauge_fix = QUDA_GAUGE_FIXED_NO;
       param.ga_pad = 0;
-      
+
       GaugeFieldParam gParam(0, param);
       gParam.pad = 0;
       gParam.ghostExchange = QUDA_GHOST_EXCHANGE_NO;
@@ -148,12 +142,12 @@ class GaugeAlgTest : public ::testing::Test
       gParam.link_type = param.type;
       gParam.reconstruct = param.reconstruct;
       gParam.setPrecision(gParam.Precision(), true);
-      
+
 #ifdef MULTI_GPU
       int y[4];
       int R[4] = {0, 0, 0, 0};
       for (int dir = 0; dir < 4; ++dir)
-	if (comm_dim_partitioned(dir)) R[dir] = 2;
+        if (comm_dim_partitioned(dir)) R[dir] = 2;
       for (int dir = 0; dir < 4; ++dir) y[dir] = param.X[dir] + 2 * R[dir];
       int pad = 0;
       GaugeFieldParam gParamEx(y, prec, link_recon, pad, QUDA_VECTOR_GEOMETRY, QUDA_GHOST_EXCHANGE_EXTENDED);
@@ -170,93 +164,88 @@ class GaugeAlgTest : public ::testing::Test
       // CURAND random generator initialization
       randstates = new RNG(gParam, 1234);
       randstates->Init();
-      
+
       nsteps = heatbath_num_steps;
       nhbsteps = heatbath_num_heatbath_per_step;
       novrsteps = heatbath_num_overrelax_per_step;
       coldstart = heatbath_coldstart;
       beta_value = heatbath_beta_value;
-      
+
       a0.Start(__func__, __FILE__, __LINE__);
       a1.Start(__func__, __FILE__, __LINE__);
-      
+
       int *num_failures_h = (int *)mapped_malloc(sizeof(int));
       int *num_failures_d = (int *)get_mapped_device_pointer(num_failures_h);
-      
+
       if (link_recon != QUDA_RECONSTRUCT_8 && coldstart)
-	InitGaugeField(*U);
+        InitGaugeField(*U);
       else
-	InitGaugeField(*U, *randstates);
-      
+        InitGaugeField(*U, *randstates);
+
       // Reunitarization setup
       SetReunitarizationConsts();
       plaquette(*U);
-      
+
       for (int step = 1; step <= nsteps; ++step) {
-	printfQuda("Step %d\n", step);
-	Monte(*U, *randstates, beta_value, nhbsteps, novrsteps);
-	
-	// Reunitarize gauge links...
-	*num_failures_h = 0;
-	unitarizeLinks(*U, num_failures_d);
-	qudaDeviceSynchronize();
-	if (*num_failures_h > 0) errorQuda("Error in the unitarization\n");
-	
-	plaquette(*U);
+        printfQuda("Step %d\n", step);
+        Monte(*U, *randstates, beta_value, nhbsteps, novrsteps);
+
+        // Reunitarize gauge links...
+        *num_failures_h = 0;
+        unitarizeLinks(*U, num_failures_d);
+        qudaDeviceSynchronize();
+        if (*num_failures_h > 0) errorQuda("Error in the unitarization\n");
+
+        plaquette(*U);
       }
       a1.Stop(__func__, __FILE__, __LINE__);
-      
+
       printfQuda("Time Monte -> %.6f s\n", a1.Last());
       plaq = plaquette(*U);
       printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z);
-      
+
       host_free(num_failures_h);
 
       // If a specific test type is requested, perfrom it now and then
       // turn off all Google tests in the tear down.
       switch (test_type) {
       case 0:
-	// Do the Google testing
-	break;
-      case 1:
-	run_ovr();
-	break;
-      case 2:
-	run_fft();
-	break;    
-      default:
-	errorQuda("Invalid test type %d ", test_type);
+        // Do the Google testing
+        break;
+      case 1: run_ovr(); break;
+      case 2: run_fft(); break;
+      default: errorQuda("Invalid test type %d ", test_type);
       }
     }
   }
-  
+
   virtual void TearDown()
   {
-    if(execute) {
+    if (execute) {
       detu = getLinkDeterminant(*U);
       double2 tru = getLinkTrace(*U);
       printfQuda("Det: %.16e:%.16e\n", detu.x, detu.y);
       printfQuda("Tr: %.16e:%.16e\n", tru.x / 3.0, tru.y / 3.0);
-      
+
       delete U;
       // Release all temporary memory used for data exchange between GPUs in multi-GPU mode
       PGaugeExchangeFree();
-      
+
       a0.Stop(__func__, __FILE__, __LINE__);
       printfQuda("Time -> %.6f s\n", a0.Last());
       randstates->Release();
-      delete randstates;      
+      delete randstates;
     }
     // If we performed a specific instance, switch off the
     // Google testing.
-    if(test_type != 0) execute = false;
+    if (test_type != 0) execute = false;
   }
-  
+
   virtual void run_ovr()
   {
-    if(execute) {
-      gaugeFixingOVR(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval,
-		     gf_theta_condition);
+    if (execute) {
+      gaugeFixingOVR(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance,
+                     gf_reunit_interval, gf_theta_condition);
       auto plaq_gf = plaquette(*U);
       printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
       ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
@@ -266,30 +255,31 @@ class GaugeAlgTest : public ::testing::Test
   }
   virtual void run_fft()
   {
-    if(execute) {
+    if (execute) {
       if (!checkDimsPartitioned()) {
-	printfQuda("Landau gauge fixing with steepest descent method with FFTs\n");
-	gaugeFixingFFT(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance,
-		       gf_theta_condition);
-	
-	auto plaq_gf = plaquette(*U);	
-	printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
-	ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
-	// Save if output string is specified
-	if (strcmp(gauge_outfile, "")) save_gauge();
+        printfQuda("Landau gauge fixing with steepest descent method with FFTs\n");
+        gaugeFixingFFT(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance,
+                       gf_theta_condition);
+
+        auto plaq_gf = plaquette(*U);
+        printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
+        ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
+        // Save if output string is specified
+        if (strcmp(gauge_outfile, "")) save_gauge();
       } else {
-	errorQuda("Cannot perform FFT gauge fixing with MPI partitions.");
+        errorQuda("Cannot perform FFT gauge fixing with MPI partitions.");
       }
     }
   }
 
-  virtual void save_gauge() {
-    
+  virtual void save_gauge()
+  {
+
     printfQuda("Saving the gauge field to file %s\n", gauge_outfile);
-    
+
     QudaGaugeParam gauge_param = newQudaGaugeParam();
     setWilsonGaugeParam(gauge_param);
-    
+
     void *cpu_gauge[4];
     for (int dir = 0; dir < 4; dir++) { cpu_gauge[dir] = malloc(V * gauge_site_size * gauge_param.cpu_prec); }
 
@@ -300,25 +290,25 @@ class GaugeAlgTest : public ::testing::Test
     gParam.link_type = param.type;
     gParam.reconstruct = param.reconstruct;
     gParam.setPrecision(gParam.Precision(), true);
-  
+
     cudaGaugeField *gauge;
     gauge = new cudaGaugeField(gParam);
-    
+
     // copy into regular field
     copyExtendedGauge(*gauge, *U, QUDA_CUDA_FIELD_LOCATION);
     saveGaugeFieldQuda((void *)cpu_gauge, (void *)gauge, &gauge_param);
-      
+
     // Write to disk
     write_gauge_field(gauge_outfile, cpu_gauge, gauge_param.cpu_prec, gauge_param.X, 0, (char **)0);
-    
+
     for (int dir = 0; dir < 4; dir++) free(cpu_gauge[dir]);
     delete gauge;
   }
 };
-  
+
 TEST_F(GaugeAlgTest, Generation)
 {
-  if(execute) {
+  if (execute) {
     detu = getLinkDeterminant(*U);
     ASSERT_TRUE(CheckDeterminant(detu));
   }
@@ -326,10 +316,10 @@ TEST_F(GaugeAlgTest, Generation)
 
 TEST_F(GaugeAlgTest, Landau_Overrelaxation)
 {
-  if(execute) {
+  if (execute) {
     printfQuda("Landau gauge fixing with overrelaxation\n");
     gaugeFixingOVR(*U, 4, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval,
-		   gf_theta_condition);
+                   gf_theta_condition);
     auto plaq_gf = plaquette(*U);
     printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
     ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
@@ -338,10 +328,10 @@ TEST_F(GaugeAlgTest, Landau_Overrelaxation)
 
 TEST_F(GaugeAlgTest, Coulomb_Overrelaxation)
 {
-  if(execute) {
+  if (execute) {
     printfQuda("Coulomb gauge fixing with overrelaxation\n");
     gaugeFixingOVR(*U, 3, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval,
-		   gf_theta_condition);
+                   gf_theta_condition);
     auto plaq_gf = plaquette(*U);
     printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
     ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
@@ -350,11 +340,11 @@ TEST_F(GaugeAlgTest, Coulomb_Overrelaxation)
 
 TEST_F(GaugeAlgTest, Landau_FFT)
 {
-  if(execute) {
+  if (execute) {
     if (!checkDimsPartitioned()) {
       printfQuda("Landau gauge fixing with steepest descent method with FFTs\n");
       gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance,
-		     gf_theta_condition);
+                     gf_theta_condition);
       auto plaq_gf = plaquette(*U);
       printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
       ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
@@ -364,11 +354,11 @@ TEST_F(GaugeAlgTest, Landau_FFT)
 
 TEST_F(GaugeAlgTest, Coulomb_FFT)
 {
-  if(execute) {
+  if (execute) {
     if (!checkDimsPartitioned()) {
       printfQuda("Coulomb gauge fixing with steepest descent method with FFTs\n");
       gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance,
-		     gf_theta_condition);
+                     gf_theta_condition);
       auto plaq_gf = plaquette(*U);
       printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
       ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
@@ -382,7 +372,7 @@ int main(int argc, char **argv)
   auto app = make_app();
   add_gaugefix_option_group(app);
   add_heatbath_option_group(app);
-  
+
   test_type = 0;
   CLI::TransformPairs<int> test_type_map {{"Google", 0}, {"OVR", 1}, {"FFT", 2}};
   app->add_option("--test", test_type, "Test method")->transform(CLI::CheckedTransformer(test_type_map));
@@ -395,12 +385,12 @@ int main(int argc, char **argv)
   if (prec_sloppy == QUDA_INVALID_PRECISION) prec_sloppy = prec;
   if (link_recon_sloppy == QUDA_RECONSTRUCT_INVALID) link_recon_sloppy = link_recon;
 
-  // initialize QMP/MPI, QUDA comms grid and RNG (host_utils.cpp)  
+  // initialize QMP/MPI, QUDA comms grid and RNG (host_utils.cpp)
   initComms(argc, argv, gridsize_from_cmdline);
 
   // call srand() with a rank-dependent seed
   initRand();
-  
+
   display_test_info();
 
   // initialize the QUDA library
@@ -415,9 +405,9 @@ int main(int argc, char **argv)
 
   // return code for google test
   int test_rc = RUN_ALL_TESTS();
-  
-  endQuda();  
+
+  endQuda();
   finalizeComms();
-  
+
   return test_rc;
 }

From 426076bbd74af7d7a8e7004de7a9dcfeb3fdca78 Mon Sep 17 00:00:00 2001
From: cpviolator <dmhowarth26@gmail.com>
Date: Fri, 30 Apr 2021 17:34:21 -0700
Subject: [PATCH 07/32] Add gf test interface to allow fine grained control
 over the GF testing

---
 tests/gauge_alg_test.cpp            |  3 ++-
 tests/heatbath_test.cpp             |  3 ++-
 tests/utils/command_line_params.cpp | 24 ++++++------------------
 3 files changed, 10 insertions(+), 20 deletions(-)

diff --git a/tests/gauge_alg_test.cpp b/tests/gauge_alg_test.cpp
index b1d09ed7de..46a0f92dd7 100644
--- a/tests/gauge_alg_test.cpp
+++ b/tests/gauge_alg_test.cpp
@@ -380,7 +380,7 @@ int main(int argc, char **argv)
   // initalize google test, includes command line options
   ::testing::InitGoogleTest(&argc, argv);
 
-  // command line options
+  // command line options  
   auto app = make_app();
   add_gaugefix_option_group(app);
   add_heatbath_option_group(app);
@@ -388,6 +388,7 @@ int main(int argc, char **argv)
   test_type = 0;
   CLI::TransformPairs<int> test_type_map {{"Google", 0}, {"OVR", 1}, {"FFT", 2}};
   app->add_option("--test", test_type, "Test method")->transform(CLI::CheckedTransformer(test_type_map));
+  
   try {
     app->parse(argc, argv);
   } catch (const CLI::ParseError &e) {
diff --git a/tests/heatbath_test.cpp b/tests/heatbath_test.cpp
index ff18f5d13f..37588df0ba 100644
--- a/tests/heatbath_test.cpp
+++ b/tests/heatbath_test.cpp
@@ -53,9 +53,10 @@ void display_test_info()
 }
 
 int main(int argc, char **argv)
-{
+{  
   // command line options
   auto app = make_app();
+  add_heatbath_option_group(app);
   try {
     app->parse(argc, argv);
   } catch (const CLI::ParseError &e) {
diff --git a/tests/utils/command_line_params.cpp b/tests/utils/command_line_params.cpp
index 581943fbaa..7b9b0abab6 100644
--- a/tests/utils/command_line_params.cpp
+++ b/tests/utils/command_line_params.cpp
@@ -223,6 +223,12 @@ quda::mgarray<QudaPrecision> mg_eig_save_prec = {};
 bool mg_eig_coarse_guess = false;
 bool mg_eig_preserve_deflation = false;
 
+int eofa_pm = 1;
+double eofa_shift = -1.2345;
+double eofa_mq1 = 1.0;
+double eofa_mq2 = 0.085;
+double eofa_mq3 = 1.0;
+
 double heatbath_beta_value = 6.2;
 int heatbath_warmup_steps = 10;
 int heatbath_num_steps = 10;
@@ -230,12 +236,6 @@ int heatbath_num_heatbath_per_step = 5;
 int heatbath_num_overrelax_per_step = 5;
 bool heatbath_coldstart = false;
 
-int eofa_pm = 1;
-double eofa_shift = -1.2345;
-double eofa_mq1 = 1.0;
-double eofa_mq2 = 0.085;
-double eofa_mq3 = 1.0;
-
 double stout_smear_rho = 0.1;
 double stout_smear_epsilon = -0.25;
 double ape_smear_rho = 0.6;
@@ -505,18 +505,6 @@ std::shared_ptr<QUDAApp> make_app(std::string app_description, std::string app_n
   quda_app->add_option("--gaussian-sigma", gaussian_sigma,
                        "Width of the Gaussian noise used for random gauge field contruction (default 0.2)");
 
-  quda_app->add_option("--heatbath-beta", heatbath_beta_value, "Beta value used in heatbath test (default 6.2)");
-  quda_app->add_option("--heatbath-coldstart", heatbath_coldstart,
-                       "Whether to use a cold or hot start in heatbath test (default false)");
-  quda_app->add_option("--heatbath-num-hb-per-step", heatbath_num_heatbath_per_step,
-                       "Number of heatbath hits per heatbath step (default 5)");
-  quda_app->add_option("--heatbath-num-or-per-step", heatbath_num_overrelax_per_step,
-                       "Number of overrelaxation hits per heatbath step (default 5)");
-  quda_app->add_option("--heatbath-num-steps", heatbath_num_steps,
-                       "Number of measurement steps in heatbath test (default 10)");
-  quda_app->add_option("--heatbath-warmup-steps", heatbath_warmup_steps,
-                       "Number of warmup steps in heatbath test (default 10)");
-
   quda_app->add_option("--inv-type", inv_type, "The type of solver to use (default cg)")
     ->transform(CLI::QUDACheckedTransformer(inverter_type_map));
   quda_app->add_option("--inv-deflate", inv_deflate, "Deflate the inverter using the eigensolver");

From fd30f59fc16efaf62ddfa531740086208078af69 Mon Sep 17 00:00:00 2001
From: cpviolator <dmhowarth26@gmail.com>
Date: Fri, 30 Apr 2021 18:48:50 -0700
Subject: [PATCH 08/32] Move the gauge al test to a ctest, make a new interface
 to the gauge fixing that allows for fine grained control and gauge IO

---
 lib/interface_quda.cpp    |  89 ++++++-------
 tests/CMakeLists.txt      |   4 +
 tests/gauge_alg_ctest.cpp | 274 ++++++++++++++++++++++++++++++++++++++
 tests/gauge_alg_test.cpp  |  57 +++-----
 tests/su3_test.cpp        |  21 +--
 5 files changed, 344 insertions(+), 101 deletions(-)
 create mode 100644 tests/gauge_alg_ctest.cpp

diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp
index bb2b578713..d873a89e72 100644
--- a/lib/interface_quda.cpp
+++ b/lib/interface_quda.cpp
@@ -234,8 +234,8 @@ static TimeProfile profileMomAction("momActionQuda");
 static TimeProfile profileEnd("endQuda");
 
 //!< Profiler for GaugeFixing
-static TimeProfile GaugeFixFFTQuda("GaugeFixFFTQuda");
-static TimeProfile GaugeFixOVRQuda("GaugeFixOVRQuda");
+static TimeProfile profileGaugeFixFFT("gaugeFixFFTQuda");
+static TimeProfile profileGaugeFixOVR("gaugeFixOVRQuda");
 
 //!< Profiler for toal time spend between init and end
 static TimeProfile profileInit2End("initQuda-endQuda",false);
@@ -1537,6 +1537,8 @@ void endQuda(void)
     profileProject.Print();
     profilePhase.Print();
     profileMomAction.Print();
+    profileGaugeFixOVR.Print();
+    profileGaugeFixFFT.Print();
     profileEnd.Print();
 
     profileInit2End.Print();
@@ -5538,12 +5540,12 @@ int computeGaugeFixingOVRQuda(void *gauge, const unsigned int gauge_dir, const u
                               const unsigned int reunit_interval, const unsigned int stopWtheta, QudaGaugeParam *param,
                               double *timeinfo)
 {
-  GaugeFixOVRQuda.TPSTART(QUDA_PROFILE_TOTAL);
-
+  profileGaugeFixOVR.TPSTART(QUDA_PROFILE_TOTAL);
+  
   checkGaugeParam(param);
 
-  GaugeFixOVRQuda.TPSTART(QUDA_PROFILE_INIT);
-  GaugeFieldParam gParam(*param, gauge);
+  profileGaugeFixOVR.TPSTART(QUDA_PROFILE_INIT);
+  GaugeFieldParam gParam(gauge, *param);
   auto *cpuGauge = new cpuGaugeField(gParam);
 
   // gParam.pad = getFatLinkPadding(param->X);
@@ -5553,44 +5555,37 @@ int computeGaugeFixingOVRQuda(void *gauge, const unsigned int gauge_dir, const u
   gParam.setPrecision(gParam.Precision(), true);
   auto *cudaInGauge = new cudaGaugeField(gParam);
 
-  GaugeFixOVRQuda.TPSTOP(QUDA_PROFILE_INIT);
-  GaugeFixOVRQuda.TPSTART(QUDA_PROFILE_H2D);
+  profileGaugeFixOVR.TPSTOP(QUDA_PROFILE_INIT);
+  profileGaugeFixOVR.TPSTART(QUDA_PROFILE_H2D);
 
-  ///if (!param->use_resident_gauge) {   // load fields onto the device
   cudaInGauge->loadCPUField(*cpuGauge);
- /* } else { // or use resident fields already present
-    if (!gaugePrecise) errorQuda("No resident gauge field allocated");
-    cudaInGauge = gaugePrecise;
-    gaugePrecise = nullptr;
-  } */
 
-  GaugeFixOVRQuda.TPSTOP(QUDA_PROFILE_H2D);
+  profileGaugeFixOVR.TPSTOP(QUDA_PROFILE_H2D);
 
   if (comm_size() == 1) {
     // perform the update
-    GaugeFixOVRQuda.TPSTART(QUDA_PROFILE_COMPUTE);
+    profileGaugeFixOVR.TPSTART(QUDA_PROFILE_COMPUTE);
     gaugeFixingOVR(*cudaInGauge, gauge_dir, Nsteps, verbose_interval, relax_boost, tolerance, reunit_interval,
                    stopWtheta);
-    GaugeFixOVRQuda.TPSTOP(QUDA_PROFILE_COMPUTE);
+    profileGaugeFixOVR.TPSTOP(QUDA_PROFILE_COMPUTE);
   } else {
-    cudaGaugeField *cudaInGaugeEx = createExtendedGauge(*cudaInGauge, R, GaugeFixOVRQuda);
+    cudaGaugeField *cudaInGaugeEx = createExtendedGauge(*cudaInGauge, R, profileGaugeFixOVR);
 
-    // perform the update
-    GaugeFixOVRQuda.TPSTART(QUDA_PROFILE_COMPUTE);
+    // Perform the update
+    profileGaugeFixOVR.TPSTART(QUDA_PROFILE_COMPUTE);
     gaugeFixingOVR(*cudaInGaugeEx, gauge_dir, Nsteps, verbose_interval, relax_boost, tolerance, reunit_interval,
                    stopWtheta);
-    GaugeFixOVRQuda.TPSTOP(QUDA_PROFILE_COMPUTE);
+    profileGaugeFixOVR.TPSTOP(QUDA_PROFILE_COMPUTE);
 
-    //HOW TO COPY BACK TO CPU: cudaInGaugeEx->cpuGauge
     copyExtendedGauge(*cudaInGauge, *cudaInGaugeEx, QUDA_CUDA_FIELD_LOCATION);
   }
-
-  // copy the gauge field back to the host
-  GaugeFixOVRQuda.TPSTART(QUDA_PROFILE_D2H);
+  
+  // Copy the gauge field back to the host
+  profileGaugeFixOVR.TPSTART(QUDA_PROFILE_D2H);
   cudaInGauge->saveCPUField(*cpuGauge);
-  GaugeFixOVRQuda.TPSTOP(QUDA_PROFILE_D2H);
+  profileGaugeFixOVR.TPSTOP(QUDA_PROFILE_D2H);
 
-  GaugeFixOVRQuda.TPSTOP(QUDA_PROFILE_TOTAL);
+  profileGaugeFixOVR.TPSTOP(QUDA_PROFILE_TOTAL);
 
   if (param->make_resident_gauge) {
     if (gaugePrecise != nullptr) delete gaugePrecise;
@@ -5600,9 +5595,9 @@ int computeGaugeFixingOVRQuda(void *gauge, const unsigned int gauge_dir, const u
   }
 
   if(timeinfo){
-    timeinfo[0] = GaugeFixOVRQuda.Last(QUDA_PROFILE_H2D);
-    timeinfo[1] = GaugeFixOVRQuda.Last(QUDA_PROFILE_COMPUTE);
-    timeinfo[2] = GaugeFixOVRQuda.Last(QUDA_PROFILE_D2H);
+    timeinfo[0] = profileGaugeFixOVR.Last(QUDA_PROFILE_H2D);
+    timeinfo[1] = profileGaugeFixOVR.Last(QUDA_PROFILE_COMPUTE);
+    timeinfo[2] = profileGaugeFixOVR.Last(QUDA_PROFILE_D2H);
   }
 
   return 0;
@@ -5612,11 +5607,11 @@ int computeGaugeFixingFFTQuda(void* gauge, const unsigned int gauge_dir,  const
   const unsigned int verbose_interval, const double alpha, const unsigned int autotune, const double tolerance, \
   const unsigned int  stopWtheta, QudaGaugeParam* param , double* timeinfo)
 {
-  GaugeFixFFTQuda.TPSTART(QUDA_PROFILE_TOTAL);
+  profileGaugeFixFFT.TPSTART(QUDA_PROFILE_TOTAL);
 
   checkGaugeParam(param);
 
-  GaugeFixFFTQuda.TPSTART(QUDA_PROFILE_INIT);
+  profileGaugeFixFFT.TPSTART(QUDA_PROFILE_INIT);
 
   GaugeFieldParam gParam(*param, gauge);
   auto *cpuGauge = new cpuGaugeField(gParam);
@@ -5629,33 +5624,27 @@ int computeGaugeFixingFFTQuda(void* gauge, const unsigned int gauge_dir,  const
   auto *cudaInGauge = new cudaGaugeField(gParam);
 
 
-  GaugeFixFFTQuda.TPSTOP(QUDA_PROFILE_INIT);
+  profileGaugeFixFFT.TPSTOP(QUDA_PROFILE_INIT);
 
-  GaugeFixFFTQuda.TPSTART(QUDA_PROFILE_H2D);
+  profileGaugeFixFFT.TPSTART(QUDA_PROFILE_H2D);
 
-  //if (!param->use_resident_gauge) {   // load fields onto the device
   cudaInGauge->loadCPUField(*cpuGauge);
-  /*} else { // or use resident fields already present
-    if (!gaugePrecise) errorQuda("No resident gauge field allocated");
-    cudaInGauge = gaugePrecise;
-    gaugePrecise = nullptr;
-  } */
 
-  GaugeFixFFTQuda.TPSTOP(QUDA_PROFILE_H2D);
+  profileGaugeFixFFT.TPSTOP(QUDA_PROFILE_H2D);
 
   // perform the update
-  GaugeFixFFTQuda.TPSTART(QUDA_PROFILE_COMPUTE);
+  profileGaugeFixFFT.TPSTART(QUDA_PROFILE_COMPUTE);
 
   gaugeFixingFFT(*cudaInGauge, gauge_dir, Nsteps, verbose_interval, alpha, autotune, tolerance, stopWtheta);
 
-  GaugeFixFFTQuda.TPSTOP(QUDA_PROFILE_COMPUTE);
+  profileGaugeFixFFT.TPSTOP(QUDA_PROFILE_COMPUTE);
 
   // copy the gauge field back to the host
-  GaugeFixFFTQuda.TPSTART(QUDA_PROFILE_D2H);
+  profileGaugeFixFFT.TPSTART(QUDA_PROFILE_D2H);
   cudaInGauge->saveCPUField(*cpuGauge);
-  GaugeFixFFTQuda.TPSTOP(QUDA_PROFILE_D2H);
+  profileGaugeFixFFT.TPSTOP(QUDA_PROFILE_D2H);
 
-  GaugeFixFFTQuda.TPSTOP(QUDA_PROFILE_TOTAL);
+  profileGaugeFixFFT.TPSTOP(QUDA_PROFILE_TOTAL);
 
   if (param->make_resident_gauge) {
     if (gaugePrecise != nullptr) delete gaugePrecise;
@@ -5663,11 +5652,11 @@ int computeGaugeFixingFFTQuda(void* gauge, const unsigned int gauge_dir,  const
   } else {
     delete cudaInGauge;
   }
-
+  
   if (timeinfo) {
-    timeinfo[0] = GaugeFixFFTQuda.Last(QUDA_PROFILE_H2D);
-    timeinfo[1] = GaugeFixFFTQuda.Last(QUDA_PROFILE_COMPUTE);
-    timeinfo[2] = GaugeFixFFTQuda.Last(QUDA_PROFILE_D2H);
+    timeinfo[0] = profileGaugeFixFFT.Last(QUDA_PROFILE_H2D);
+    timeinfo[1] = profileGaugeFixFFT.Last(QUDA_PROFILE_COMPUTE);
+    timeinfo[2] = profileGaugeFixFFT.Last(QUDA_PROFILE_D2H);
   }
 
   return 0;
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index dcd7c8fa4b..6b9d21e1d5 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -226,6 +226,10 @@ if(QUDA_GAUGE_ALG)
   add_executable(gauge_alg_test gauge_alg_test.cpp)
   target_link_libraries(gauge_alg_test ${TEST_LIBS})
   quda_checkbuildtest(gauge_alg_test QUDA_BUILD_ALL_TESTS)
+
+  add_executable(gauge_alg_ctest gauge_alg_ctest.cpp)
+  target_link_libraries(gauge_alg_ctest ${TEST_LIBS})
+  quda_checkbuildtest(gauge_alg_ctest QUDA_BUILD_ALL_TESTS)
   install(TARGETS gauge_alg_test ${QUDA_EXCLUDE_FROM_INSTALL} DESTINATION ${CMAKE_INSTALL_BINDIR})
 
   add_executable(heatbath_test heatbath_test.cpp)
diff --git a/tests/gauge_alg_ctest.cpp b/tests/gauge_alg_ctest.cpp
new file mode 100644
index 0000000000..410d2304bd
--- /dev/null
+++ b/tests/gauge_alg_ctest.cpp
@@ -0,0 +1,274 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <quda.h>
+#include <quda_internal.h>
+#include <gauge_field.h>
+
+#include <comm_quda.h>
+#include <host_utils.h>
+#include <command_line_params.h>
+#include <gauge_tools.h>
+
+#include <pgauge_monte.h>
+#include <random_quda.h>
+#include <unitarization_links.h>
+
+#include <qio_field.h>
+
+#include <gtest/gtest.h>
+
+using namespace quda;
+
+class GaugeAlgTest : public ::testing::Test
+{  
+ protected:
+
+  QudaGaugeParam param;
+  
+  Timer a0,a1;
+  double2 detu;
+  double3 plaq;
+  cudaGaugeField *U;
+  int nsteps;
+  int nhbsteps;
+  int novrsteps;
+  bool coldstart;
+  double beta_value;
+
+  RNG * randstates;
+
+  
+  void SetReunitarizationConsts(){
+    const double unitarize_eps = 1e-14;
+    const double max_error = 1e-10;
+    const int reunit_allow_svd = 1;
+    const int reunit_svd_only  = 0;
+    const double svd_rel_error = 1e-6;
+    const double svd_abs_error = 1e-6;
+    setUnitarizeLinksConstants(unitarize_eps, max_error,
+                               reunit_allow_svd, reunit_svd_only,
+                               svd_rel_error, svd_abs_error);
+
+  }
+
+  bool checkDimsPartitioned()
+  {
+    if (comm_dim_partitioned(0) || comm_dim_partitioned(1) || comm_dim_partitioned(2) || comm_dim_partitioned(3))
+      return true;
+    return false;
+  }
+
+  bool comparePlaquette(double3 a, double3 b){
+    double a0,a1,a2;
+    a0 = std::abs(a.x - b.x);
+    a1 = std::abs(a.y - b.y);
+    a2 = std::abs(a.z - b.z);
+    double prec_val = 1.0e-5;
+    if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1.0e-15;
+    if ((a0 < prec_val) && (a1 < prec_val) && (a2 < prec_val)) return true;
+    return false;
+  }
+
+  bool CheckDeterminant(double2 detu){
+    double prec_val = 5e-8;
+    if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1.0e-15;
+    if (std::abs(1.0 - detu.x) < prec_val && std::abs(detu.y) < prec_val) return true;
+    return false;
+  }
+
+  virtual void SetUp() {
+    setVerbosity(QUDA_VERBOSE);
+
+    param = newQudaGaugeParam();
+
+    // Setup gauge container.
+    param.cpu_prec = prec;
+    param.cpu_prec = prec;
+    param.cuda_prec = prec;
+    param.reconstruct = link_recon;
+    param.cuda_prec_sloppy = prec;
+    param.reconstruct_sloppy = link_recon;
+
+    param.type = QUDA_WILSON_LINKS;
+    param.gauge_order = QUDA_MILC_GAUGE_ORDER;
+
+    param.X[0] = xdim;
+    param.X[1] = ydim;
+    param.X[2] = zdim;
+    param.X[3] = tdim;
+    setDims(param.X);
+
+    param.anisotropy = 1.0;  //don't support anisotropy for now!!!!!!
+    param.t_boundary = QUDA_PERIODIC_T;
+    param.gauge_fix = QUDA_GAUGE_FIXED_NO;
+    param.ga_pad = 0;
+
+    GaugeFieldParam gParam(0, param);
+    gParam.pad = 0;
+    gParam.ghostExchange = QUDA_GHOST_EXCHANGE_NO;
+    gParam.create      = QUDA_NULL_FIELD_CREATE;
+    gParam.link_type   = param.type;
+    gParam.reconstruct = param.reconstruct;
+    gParam.setPrecision(gParam.Precision(), true);
+
+#ifdef MULTI_GPU
+    int y[4];
+    int R[4] = {0,0,0,0};
+    for(int dir=0; dir<4; ++dir) if(comm_dim_partitioned(dir)) R[dir] = 2;
+    for(int dir=0; dir<4; ++dir) y[dir] = param.X[dir] + 2 * R[dir];
+    int pad = 0;
+    GaugeFieldParam gParamEx(y, prec, link_recon,
+                             pad, QUDA_VECTOR_GEOMETRY, QUDA_GHOST_EXCHANGE_EXTENDED);
+    gParamEx.create = QUDA_ZERO_FIELD_CREATE;
+    gParamEx.order = gParam.order;
+    gParamEx.siteSubset = QUDA_FULL_SITE_SUBSET;
+    gParamEx.t_boundary = gParam.t_boundary;
+    gParamEx.nFace = 1;
+    for(int dir=0; dir<4; ++dir) gParamEx.r[dir] = R[dir];
+    U = new cudaGaugeField(gParamEx);
+#else
+    U = new cudaGaugeField(gParam);
+#endif
+    // CURAND random generator initialization
+    randstates = new RNG(gParam, 1234);
+    randstates->Init();
+
+    nsteps = heatbath_num_steps;
+    nhbsteps = heatbath_num_heatbath_per_step;
+    novrsteps = heatbath_num_overrelax_per_step;
+    coldstart = heatbath_coldstart;
+    beta_value = heatbath_beta_value;
+
+    a0.Start(__func__, __FILE__, __LINE__);
+    a1.Start(__func__, __FILE__, __LINE__);
+
+    int *num_failures_h = (int *)mapped_malloc(sizeof(int));
+    int *num_failures_d = (int *)get_mapped_device_pointer(num_failures_h);
+
+    if (link_recon != QUDA_RECONSTRUCT_8 && coldstart)
+      InitGaugeField(*U);
+    else
+      InitGaugeField(*U, *randstates);
+
+    // Reunitarization setup
+    SetReunitarizationConsts();
+    plaquette(*U);
+
+    for(int step=1; step<=nsteps; ++step){
+      printfQuda("Step %d\n",step);
+      Monte(*U, *randstates, beta_value, nhbsteps, novrsteps);
+
+      //Reunitarize gauge links...
+      *num_failures_h = 0;
+      unitarizeLinks(*U, num_failures_d);
+      qudaDeviceSynchronize();
+      if (*num_failures_h > 0) errorQuda("Error in the unitarization\n");
+
+      plaquette(*U);
+    }
+    a1.Stop(__func__, __FILE__, __LINE__);
+
+    printfQuda("Time Monte -> %.6f s\n", a1.Last());
+    plaq = plaquette(*U);
+    printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z);
+
+    host_free(num_failures_h);
+  }
+
+  virtual void TearDown() {
+    detu = getLinkDeterminant(*U);
+    double2 tru = getLinkTrace(*U);
+    printfQuda("Det: %.16e:%.16e\n", detu.x, detu.y);
+    printfQuda("Tr: %.16e:%.16e\n", tru.x/3.0, tru.y/3.0);
+
+    delete U;
+    //Release all temporary memory used for data exchange between GPUs in multi-GPU mode
+    PGaugeExchangeFree();
+
+    a0.Stop(__func__, __FILE__, __LINE__);
+    printfQuda("Time -> %.6f s\n", a0.Last());
+    randstates->Release();
+    delete randstates;
+  }
+};
+
+TEST_F(GaugeAlgTest, Generation)
+{
+  detu = getLinkDeterminant(*U);
+  ASSERT_TRUE(CheckDeterminant(detu));
+}
+
+TEST_F(GaugeAlgTest, Landau_Overrelaxation)
+{
+  printfQuda("Landau gauge fixing with overrelaxation\n");
+  gaugeFixingOVR(*U, 4, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, gf_theta_condition);
+  auto plaq_gf = plaquette(*U);
+  printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
+  ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
+}
+
+TEST_F(GaugeAlgTest, Coulomb_Overrelaxation)
+{
+  printfQuda("Coulomb gauge fixing with overrelaxation\n");
+  gaugeFixingOVR(*U, 3, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, gf_theta_condition);
+  auto plaq_gf = plaquette(*U);
+  printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
+  ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
+}
+
+TEST_F(GaugeAlgTest, Landau_FFT)
+{
+  if (!checkDimsPartitioned()) {
+    printfQuda("Landau gauge fixing with steepest descent method with FFTs\n");
+    gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, gf_theta_condition);
+    auto plaq_gf = plaquette(*U);
+    printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
+    ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
+  }
+}
+
+TEST_F(GaugeAlgTest, Coulomb_FFT)
+{
+  if (!checkDimsPartitioned()) {
+    printfQuda("Coulomb gauge fixing with steepest descent method with FFTs\n");
+    gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, gf_theta_condition);
+    auto plaq_gf = plaquette(*U);
+    printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
+    ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
+  }
+}
+
+int main(int argc, char **argv)
+{
+  // initalize google test, includes command line options
+  ::testing::InitGoogleTest(&argc, argv);
+  // return code for google test
+  int test_rc = 0;
+  xdim=ydim=zdim=tdim=32;
+
+  // command line options  
+  auto app = make_app();
+  add_gaugefix_option_group(app);
+  add_heatbath_option_group(app);
+  try {
+    app->parse(argc, argv);
+  } catch (const CLI::ParseError &e) {
+    return app->exit(e);
+  }
+
+  initComms(argc, argv, gridsize_from_cmdline);
+
+  // Ensure gtest prints only from rank 0
+  ::testing::TestEventListeners &listeners = ::testing::UnitTest::GetInstance()->listeners();
+  if (comm_rank() != 0) { delete listeners.Release(listeners.default_result_printer()); }
+
+  initQuda(device_ordinal);
+  test_rc = RUN_ALL_TESTS();
+  endQuda();
+
+  finalizeComms();
+
+  return test_rc;
+}
diff --git a/tests/gauge_alg_test.cpp b/tests/gauge_alg_test.cpp
index 46a0f92dd7..d97f4c41bf 100644
--- a/tests/gauge_alg_test.cpp
+++ b/tests/gauge_alg_test.cpp
@@ -329,50 +329,32 @@ TEST_F(GaugeAlgTest, Landau_Overrelaxation)
   }
 }
 
-TEST_F(GaugeAlgTest, Coulomb_Overrelaxation)
+bool checkDimsPartitioned()
 {
-  if (execute) {
-    printfQuda("Coulomb gauge fixing with overrelaxation\n");
-    gaugeFixingOVR(*U, 3, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval,
-                   gf_theta_condition);
-    auto plaq_gf = plaquette(*U);
-    printfQuda("Plaq:    %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z);
-    printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
-    ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
-    saveTuneCache();
-  }
+  if (comm_dim_partitioned(0) || comm_dim_partitioned(1) || comm_dim_partitioned(2) || comm_dim_partitioned(3))
+    return true;
+  return false;
 }
 
-TEST_F(GaugeAlgTest, Landau_FFT)
+bool comparePlaquette(double3 a, double3 b)
 {
-  if (execute) {
-    if (!comm_partitioned()) {
-      printfQuda("Landau gauge fixing with steepest descent method with FFTs\n");
-      gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance,
-                     gf_theta_condition);
-      auto plaq_gf = plaquette(*U);
-      printfQuda("Plaq:    %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z);
-      printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
-      ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
-      saveTuneCache();
-    }
-  }
+  printfQuda("Plaq:    %.16e, %.16e, %.16e\n", a.x, a.y, a.z);
+  printfQuda("Plaq_gf: %.16e, %.16e, %.16e\n", b.x, b.y, b.z);   
+  double a0,a1,a2;
+  a0 = std::abs(a.x - b.x);
+  a1 = std::abs(a.y - b.y);
+  a2 = std::abs(a.z - b.z);
+  double prec_val = 1.0e-5;
+  if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1.0e-15;
+  return ((a0 < prec_val) && (a1 < prec_val) && (a2 < prec_val));
 }
 
-TEST_F(GaugeAlgTest, Coulomb_FFT)
+bool checkDeterminant(double2 detu)
 {
-  if (execute) {
-    if (!comm_partitioned()) {
-      printfQuda("Coulomb gauge fixing with steepest descent method with FFTs\n");
-      gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance,
-                     gf_theta_condition);
-      auto plaq_gf = plaquette(*U);
-      printfQuda("Plaq:    %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z);
-      printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
-      ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
-      saveTuneCache();
-    }
-  }
+  printfQuda("Det: %.16e: %.16e\n", detu.x, detu.y);
+  double prec_val = 5e-8;
+  if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1.0e-15;
+  return std::abs(1.0 - detu.x) < prec_val && std::abs(detu.y) < prec_val;
 }
 
 int main(int argc, char **argv)
@@ -443,4 +425,5 @@ int main(int argc, char **argv)
   finalizeComms();
 
   return test_rc;
+
 }
diff --git a/tests/su3_test.cpp b/tests/su3_test.cpp
index 413d73541d..8ff7d5128f 100644
--- a/tests/su3_test.cpp
+++ b/tests/su3_test.cpp
@@ -85,26 +85,22 @@ int main(int argc, char **argv)
   setWilsonGaugeParam(gauge_param);
   gauge_param.t_boundary = QUDA_PERIODIC_T;
   setDims(gauge_param.X);
-
+  
+  // All user inputs now defined
+  display_test_info();
+  
+  // *** QUDA parameters begin here.  
   void *gauge[4], *new_gauge[4];
-
   for (int dir = 0; dir < 4; dir++) {
     gauge[dir] = safe_malloc(V * gauge_site_size * host_gauge_data_type_size);
     new_gauge[dir] = safe_malloc(V * gauge_site_size * host_gauge_data_type_size);
   }
-
-  initQuda(device_ordinal);
-
-  setVerbosity(verbosity);
-
-  // call srand() with a rank-dependent seed
-  initRand();
-
+  
   constructHostGaugeField(gauge, gauge_param, argc, argv);
   // Load the gauge field to the device
   loadGaugeQuda((void *)gauge, &gauge_param);
   saveGaugeQuda(new_gauge, &gauge_param);
-
+  
   double plaq[3];
   plaqQuda(plaq);
   printfQuda("Computed plaquette gauge precise is %.16e (spatial = %.16e, temporal = %.16e)\n", plaq[0], plaq[1],
@@ -112,9 +108,6 @@ int main(int argc, char **argv)
 
 #ifdef GPU_GAUGE_TOOLS
 
-  // All user inputs now defined
-  display_test_info();
-
   // Topological charge and gauge energy
   double q_charge_check = 0.0;
   // Size of floating point data

From 01e2726404a8dff2a5d2d9c7827109423683e1b3 Mon Sep 17 00:00:00 2001
From: cpviolator <dmhowarth26@gmail.com>
Date: Fri, 30 Apr 2021 18:48:50 -0700
Subject: [PATCH 09/32] Move the gauge_alg_test to a ctest, make a new
 interface to the gauge fixing that allows for fine grained control and gauge
 IO.

---
 tests/gauge_alg_test.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/gauge_alg_test.cpp b/tests/gauge_alg_test.cpp
index d97f4c41bf..b88e4aa7b7 100644
--- a/tests/gauge_alg_test.cpp
+++ b/tests/gauge_alg_test.cpp
@@ -11,6 +11,7 @@
 #include <command_line_params.h>
 #include <misc.h>
 #include <timer.h>
+
 #include <gauge_tools.h>
 #include <tune_quda.h>
 
@@ -422,8 +423,7 @@ int main(int argc, char **argv)
 
   endQuda();
 
-  finalizeComms();
-
-  return test_rc;
-
+  finalizeComms()
+  
+  return test_rc;  
 }

From a4fee79136b7bb8201480c6dcea77ffca33b9e1c Mon Sep 17 00:00:00 2001
From: cpviolator <dmhowarth26@gmail.com>
Date: Fri, 30 Apr 2021 18:54:13 -0700
Subject: [PATCH 10/32] clang tidy

---
 lib/interface_quda.cpp    |  7 ++--
 tests/gauge_alg_ctest.cpp | 80 ++++++++++++++++++++-------------------
 tests/gauge_alg_test.cpp  |  4 +-
 tests/heatbath_test.cpp   |  2 +-
 tests/su3_test.cpp        | 13 ++++---
 5 files changed, 56 insertions(+), 50 deletions(-)

diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp
index d873a89e72..7f75793180 100644
--- a/lib/interface_quda.cpp
+++ b/lib/interface_quda.cpp
@@ -5541,7 +5541,7 @@ int computeGaugeFixingOVRQuda(void *gauge, const unsigned int gauge_dir, const u
                               double *timeinfo)
 {
   profileGaugeFixOVR.TPSTART(QUDA_PROFILE_TOTAL);
-  
+
   checkGaugeParam(param);
 
   profileGaugeFixOVR.TPSTART(QUDA_PROFILE_INIT);
@@ -5579,7 +5579,7 @@ int computeGaugeFixingOVRQuda(void *gauge, const unsigned int gauge_dir, const u
 
     copyExtendedGauge(*cudaInGauge, *cudaInGaugeEx, QUDA_CUDA_FIELD_LOCATION);
   }
-  
+
   // Copy the gauge field back to the host
   profileGaugeFixOVR.TPSTART(QUDA_PROFILE_D2H);
   cudaInGauge->saveCPUField(*cpuGauge);
@@ -5623,7 +5623,6 @@ int computeGaugeFixingFFTQuda(void* gauge, const unsigned int gauge_dir,  const
   gParam.setPrecision(gParam.Precision(), true);
   auto *cudaInGauge = new cudaGaugeField(gParam);
 
-
   profileGaugeFixFFT.TPSTOP(QUDA_PROFILE_INIT);
 
   profileGaugeFixFFT.TPSTART(QUDA_PROFILE_H2D);
@@ -5652,7 +5651,7 @@ int computeGaugeFixingFFTQuda(void* gauge, const unsigned int gauge_dir,  const
   } else {
     delete cudaInGauge;
   }
-  
+
   if (timeinfo) {
     timeinfo[0] = profileGaugeFixFFT.Last(QUDA_PROFILE_H2D);
     timeinfo[1] = profileGaugeFixFFT.Last(QUDA_PROFILE_COMPUTE);
diff --git a/tests/gauge_alg_ctest.cpp b/tests/gauge_alg_ctest.cpp
index 410d2304bd..cdaa2efd9a 100644
--- a/tests/gauge_alg_ctest.cpp
+++ b/tests/gauge_alg_ctest.cpp
@@ -22,12 +22,11 @@
 using namespace quda;
 
 class GaugeAlgTest : public ::testing::Test
-{  
- protected:
-
+{
+protected:
   QudaGaugeParam param;
-  
-  Timer a0,a1;
+
+  Timer a0, a1;
   double2 detu;
   double3 plaq;
   cudaGaugeField *U;
@@ -37,20 +36,17 @@ class GaugeAlgTest : public ::testing::Test
   bool coldstart;
   double beta_value;
 
-  RNG * randstates;
+  RNG *randstates;
 
-  
-  void SetReunitarizationConsts(){
+  void SetReunitarizationConsts()
+  {
     const double unitarize_eps = 1e-14;
     const double max_error = 1e-10;
     const int reunit_allow_svd = 1;
-    const int reunit_svd_only  = 0;
+    const int reunit_svd_only = 0;
     const double svd_rel_error = 1e-6;
     const double svd_abs_error = 1e-6;
-    setUnitarizeLinksConstants(unitarize_eps, max_error,
-                               reunit_allow_svd, reunit_svd_only,
-                               svd_rel_error, svd_abs_error);
-
+    setUnitarizeLinksConstants(unitarize_eps, max_error, reunit_allow_svd, reunit_svd_only, svd_rel_error, svd_abs_error);
   }
 
   bool checkDimsPartitioned()
@@ -60,8 +56,9 @@ class GaugeAlgTest : public ::testing::Test
     return false;
   }
 
-  bool comparePlaquette(double3 a, double3 b){
-    double a0,a1,a2;
+  bool comparePlaquette(double3 a, double3 b)
+  {
+    double a0, a1, a2;
     a0 = std::abs(a.x - b.x);
     a1 = std::abs(a.y - b.y);
     a2 = std::abs(a.z - b.z);
@@ -71,14 +68,16 @@ class GaugeAlgTest : public ::testing::Test
     return false;
   }
 
-  bool CheckDeterminant(double2 detu){
+  bool CheckDeterminant(double2 detu)
+  {
     double prec_val = 5e-8;
     if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1.0e-15;
     if (std::abs(1.0 - detu.x) < prec_val && std::abs(detu.y) < prec_val) return true;
     return false;
   }
 
-  virtual void SetUp() {
+  virtual void SetUp()
+  {
     setVerbosity(QUDA_VERBOSE);
 
     param = newQudaGaugeParam();
@@ -100,7 +99,7 @@ class GaugeAlgTest : public ::testing::Test
     param.X[3] = tdim;
     setDims(param.X);
 
-    param.anisotropy = 1.0;  //don't support anisotropy for now!!!!!!
+    param.anisotropy = 1.0; // don't support anisotropy for now!!!!!!
     param.t_boundary = QUDA_PERIODIC_T;
     param.gauge_fix = QUDA_GAUGE_FIXED_NO;
     param.ga_pad = 0;
@@ -108,25 +107,25 @@ class GaugeAlgTest : public ::testing::Test
     GaugeFieldParam gParam(0, param);
     gParam.pad = 0;
     gParam.ghostExchange = QUDA_GHOST_EXCHANGE_NO;
-    gParam.create      = QUDA_NULL_FIELD_CREATE;
-    gParam.link_type   = param.type;
+    gParam.create = QUDA_NULL_FIELD_CREATE;
+    gParam.link_type = param.type;
     gParam.reconstruct = param.reconstruct;
     gParam.setPrecision(gParam.Precision(), true);
 
 #ifdef MULTI_GPU
     int y[4];
-    int R[4] = {0,0,0,0};
-    for(int dir=0; dir<4; ++dir) if(comm_dim_partitioned(dir)) R[dir] = 2;
-    for(int dir=0; dir<4; ++dir) y[dir] = param.X[dir] + 2 * R[dir];
+    int R[4] = {0, 0, 0, 0};
+    for (int dir = 0; dir < 4; ++dir)
+      if (comm_dim_partitioned(dir)) R[dir] = 2;
+    for (int dir = 0; dir < 4; ++dir) y[dir] = param.X[dir] + 2 * R[dir];
     int pad = 0;
-    GaugeFieldParam gParamEx(y, prec, link_recon,
-                             pad, QUDA_VECTOR_GEOMETRY, QUDA_GHOST_EXCHANGE_EXTENDED);
+    GaugeFieldParam gParamEx(y, prec, link_recon, pad, QUDA_VECTOR_GEOMETRY, QUDA_GHOST_EXCHANGE_EXTENDED);
     gParamEx.create = QUDA_ZERO_FIELD_CREATE;
     gParamEx.order = gParam.order;
     gParamEx.siteSubset = QUDA_FULL_SITE_SUBSET;
     gParamEx.t_boundary = gParam.t_boundary;
     gParamEx.nFace = 1;
-    for(int dir=0; dir<4; ++dir) gParamEx.r[dir] = R[dir];
+    for (int dir = 0; dir < 4; ++dir) gParamEx.r[dir] = R[dir];
     U = new cudaGaugeField(gParamEx);
 #else
     U = new cudaGaugeField(gParam);
@@ -156,11 +155,11 @@ class GaugeAlgTest : public ::testing::Test
     SetReunitarizationConsts();
     plaquette(*U);
 
-    for(int step=1; step<=nsteps; ++step){
-      printfQuda("Step %d\n",step);
+    for (int step = 1; step <= nsteps; ++step) {
+      printfQuda("Step %d\n", step);
       Monte(*U, *randstates, beta_value, nhbsteps, novrsteps);
 
-      //Reunitarize gauge links...
+      // Reunitarize gauge links...
       *num_failures_h = 0;
       unitarizeLinks(*U, num_failures_d);
       qudaDeviceSynchronize();
@@ -177,14 +176,15 @@ class GaugeAlgTest : public ::testing::Test
     host_free(num_failures_h);
   }
 
-  virtual void TearDown() {
+  virtual void TearDown()
+  {
     detu = getLinkDeterminant(*U);
     double2 tru = getLinkTrace(*U);
     printfQuda("Det: %.16e:%.16e\n", detu.x, detu.y);
-    printfQuda("Tr: %.16e:%.16e\n", tru.x/3.0, tru.y/3.0);
+    printfQuda("Tr: %.16e:%.16e\n", tru.x / 3.0, tru.y / 3.0);
 
     delete U;
-    //Release all temporary memory used for data exchange between GPUs in multi-GPU mode
+    // Release all temporary memory used for data exchange between GPUs in multi-GPU mode
     PGaugeExchangeFree();
 
     a0.Stop(__func__, __FILE__, __LINE__);
@@ -203,7 +203,8 @@ TEST_F(GaugeAlgTest, Generation)
 TEST_F(GaugeAlgTest, Landau_Overrelaxation)
 {
   printfQuda("Landau gauge fixing with overrelaxation\n");
-  gaugeFixingOVR(*U, 4, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, gf_theta_condition);
+  gaugeFixingOVR(*U, 4, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval,
+                 gf_theta_condition);
   auto plaq_gf = plaquette(*U);
   printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
   ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
@@ -212,7 +213,8 @@ TEST_F(GaugeAlgTest, Landau_Overrelaxation)
 TEST_F(GaugeAlgTest, Coulomb_Overrelaxation)
 {
   printfQuda("Coulomb gauge fixing with overrelaxation\n");
-  gaugeFixingOVR(*U, 3, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, gf_theta_condition);
+  gaugeFixingOVR(*U, 3, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval,
+                 gf_theta_condition);
   auto plaq_gf = plaquette(*U);
   printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
   ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
@@ -222,7 +224,8 @@ TEST_F(GaugeAlgTest, Landau_FFT)
 {
   if (!checkDimsPartitioned()) {
     printfQuda("Landau gauge fixing with steepest descent method with FFTs\n");
-    gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, gf_theta_condition);
+    gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance,
+                   gf_theta_condition);
     auto plaq_gf = plaquette(*U);
     printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
     ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
@@ -233,7 +236,8 @@ TEST_F(GaugeAlgTest, Coulomb_FFT)
 {
   if (!checkDimsPartitioned()) {
     printfQuda("Coulomb gauge fixing with steepest descent method with FFTs\n");
-    gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, gf_theta_condition);
+    gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance,
+                   gf_theta_condition);
     auto plaq_gf = plaquette(*U);
     printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
     ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
@@ -246,9 +250,9 @@ int main(int argc, char **argv)
   ::testing::InitGoogleTest(&argc, argv);
   // return code for google test
   int test_rc = 0;
-  xdim=ydim=zdim=tdim=32;
+  xdim = ydim = zdim = tdim = 32;
 
-  // command line options  
+  // command line options
   auto app = make_app();
   add_gaugefix_option_group(app);
   add_heatbath_option_group(app);
diff --git a/tests/gauge_alg_test.cpp b/tests/gauge_alg_test.cpp
index b88e4aa7b7..5728dd1141 100644
--- a/tests/gauge_alg_test.cpp
+++ b/tests/gauge_alg_test.cpp
@@ -340,8 +340,8 @@ bool checkDimsPartitioned()
 bool comparePlaquette(double3 a, double3 b)
 {
   printfQuda("Plaq:    %.16e, %.16e, %.16e\n", a.x, a.y, a.z);
-  printfQuda("Plaq_gf: %.16e, %.16e, %.16e\n", b.x, b.y, b.z);   
-  double a0,a1,a2;
+  printfQuda("Plaq_gf: %.16e, %.16e, %.16e\n", b.x, b.y, b.z);
+  double a0, a1, a2;
   a0 = std::abs(a.x - b.x);
   a1 = std::abs(a.y - b.y);
   a2 = std::abs(a.z - b.z);
diff --git a/tests/heatbath_test.cpp b/tests/heatbath_test.cpp
index 37588df0ba..e3aa2cb349 100644
--- a/tests/heatbath_test.cpp
+++ b/tests/heatbath_test.cpp
@@ -53,7 +53,7 @@ void display_test_info()
 }
 
 int main(int argc, char **argv)
-{  
+{
   // command line options
   auto app = make_app();
   add_heatbath_option_group(app);
diff --git a/tests/su3_test.cpp b/tests/su3_test.cpp
index 8ff7d5128f..d2a540ae4d 100644
--- a/tests/su3_test.cpp
+++ b/tests/su3_test.cpp
@@ -82,25 +82,28 @@ int main(int argc, char **argv)
   if (prec_sloppy == QUDA_INVALID_PRECISION) prec_sloppy = prec;
   if (link_recon_sloppy == QUDA_RECONSTRUCT_INVALID) link_recon_sloppy = link_recon;
 
+  initQuda(device_ordinal);
+  setVerbosity(verbosity);
+
   setWilsonGaugeParam(gauge_param);
   gauge_param.t_boundary = QUDA_PERIODIC_T;
   setDims(gauge_param.X);
-  
+
   // All user inputs now defined
   display_test_info();
-  
-  // *** QUDA parameters begin here.  
+
+  // *** QUDA parameters begin here.
   void *gauge[4], *new_gauge[4];
   for (int dir = 0; dir < 4; dir++) {
     gauge[dir] = safe_malloc(V * gauge_site_size * host_gauge_data_type_size);
     new_gauge[dir] = safe_malloc(V * gauge_site_size * host_gauge_data_type_size);
   }
-  
+
   constructHostGaugeField(gauge, gauge_param, argc, argv);
   // Load the gauge field to the device
   loadGaugeQuda((void *)gauge, &gauge_param);
   saveGaugeQuda(new_gauge, &gauge_param);
-  
+
   double plaq[3];
   plaqQuda(plaq);
   printfQuda("Computed plaquette gauge precise is %.16e (spatial = %.16e, temporal = %.16e)\n", plaq[0], plaq[1],

From 15b7e4c30cfd58d5a629e9c664c770c5f1e89cc0 Mon Sep 17 00:00:00 2001
From: cpviolator <dmhowarth26@gmail.com>
Date: Sat, 1 May 2021 17:59:56 -0700
Subject: [PATCH 11/32] Allow for single case testing in gauge_alg_ctest, minor
 clean up of gauge fixing stdout and comments

---
 lib/gauge_fix_fft.cu      |   2 +-
 tests/CMakeLists.txt      |  10 +-
 tests/gauge_alg_ctest.cpp | 443 +++++++++++++++++++++++++-------------
 3 files changed, 298 insertions(+), 157 deletions(-)

diff --git a/lib/gauge_fix_fft.cu b/lib/gauge_fix_fft.cu
index ac991ea63c..23f924d28b 100644
--- a/lib/gauge_fix_fft.cu
+++ b/lib/gauge_fix_fft.cu
@@ -193,7 +193,7 @@ namespace quda {
     if (getVerbosity() >= QUDA_SUMMARIZE) {
       printfQuda("\tAuto tune active: %s\n", autotune ? "true" : "false");      
       printfQuda("\tAlpha parameter of the Steepest Descent Method: %e\n", alpha0);
-      printfQuda("\tTolerance: %lf\n", tolerance);
+      printfQuda("\tTolerance: %e\n", tolerance);
       printfQuda("\tStop criterion method: %s\n", stopWtheta ? "Theta" : "Delta");
       printfQuda("\tMaximum number of iterations: %d\n", Nsteps);
       printfQuda("\tPrint convergence results at every %d steps\n", verbose_interval);
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 6b9d21e1d5..764cc6af97 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -223,14 +223,10 @@ if(QUDA_FORCE_GAUGE)
 endif()
 
 if(QUDA_GAUGE_ALG)
-  add_executable(gauge_alg_test gauge_alg_test.cpp)
-  target_link_libraries(gauge_alg_test ${TEST_LIBS})
-  quda_checkbuildtest(gauge_alg_test QUDA_BUILD_ALL_TESTS)
-
   add_executable(gauge_alg_ctest gauge_alg_ctest.cpp)
   target_link_libraries(gauge_alg_ctest ${TEST_LIBS})
   quda_checkbuildtest(gauge_alg_ctest QUDA_BUILD_ALL_TESTS)
-  install(TARGETS gauge_alg_test ${QUDA_EXCLUDE_FROM_INSTALL} DESTINATION ${CMAKE_INSTALL_BINDIR})
+  install(TARGETS gauge_alg_ctest ${QUDA_EXCLUDE_FROM_INSTALL} DESTINATION ${CMAKE_INSTALL_BINDIR})
 
   add_executable(heatbath_test heatbath_test.cpp)
   target_link_libraries(heatbath_test ${TEST_LIBS})
@@ -816,8 +812,8 @@ foreach(prec IN LISTS TEST_PRECS)
 
   if(QUDA_GAUGE_ALG)
     add_test(NAME gauge_alg_${prec}
-             COMMAND ${QUDA_CTEST_LAUNCH} $<TARGET_FILE:gauge_alg_test> ${MPIEXEC_POSTFLAGS}
-                     --dim 4 6 8 10 --prec ${prec}
+             COMMAND ${QUDA_CTEST_LAUNCH} $<TARGET_FILE:gauge_alg ctest> ${MPIEXEC_POSTFLAGS}
+                     --dim 2 4 6 8 --prec ${prec}
                      --gtest_output=xml:gauge_arg_test_${prec}.xml)
   endif()
 
diff --git a/tests/gauge_alg_ctest.cpp b/tests/gauge_alg_ctest.cpp
index cdaa2efd9a..881d53bb76 100644
--- a/tests/gauge_alg_ctest.cpp
+++ b/tests/gauge_alg_ctest.cpp
@@ -9,6 +9,7 @@
 #include <comm_quda.h>
 #include <host_utils.h>
 #include <command_line_params.h>
+#include <misc.h>
 #include <gauge_tools.h>
 
 #include <pgauge_monte.h>
@@ -21,6 +22,42 @@
 
 using namespace quda;
 
+//***********************************************************//
+// This boolean controls whether or not the full Google test //
+// is done. If the user passes a value of 1 or 2 to --test   //
+// then a single instance of OVR or FFT gauge fixing is done //
+// and the value of this bool is set to false. Otherwise the //
+// Google tests are performed.                               //
+//***********************************************************//
+bool execute = true;
+
+void display_test_info()
+{
+  printfQuda("running the following test:\n");
+
+  switch (test_type) {
+  case 0:
+    printfQuda("\n Google testing\n");
+    break;
+  case 1:
+    printfQuda("\nOVR gauge fix\n");
+    break;
+  case 2:
+    printfQuda("\nFFT gauge fix\n");
+    break;
+  default: errorQuda("Undefined test type %d given", test_type);
+  }
+  
+  printfQuda("prec    sloppy_prec    link_recon  sloppy_link_recon S_dimension T_dimension Ls_dimension\n");
+  printfQuda("%s   %s             %s            %s            %d/%d/%d          %d         %d\n", get_prec_str(prec),
+             get_prec_str(prec_sloppy), get_recon_str(link_recon), get_recon_str(link_recon_sloppy), xdim, ydim, zdim,
+             tdim, Lsdim);
+
+  printfQuda("Grid partition info:     X  Y  Z  T\n");
+  printfQuda("                         %d  %d  %d  %d\n", dimPartitioned(0), dimPartitioned(1), dimPartitioned(2),
+             dimPartitioned(3));
+}
+
 class GaugeAlgTest : public ::testing::Test
 {
 protected:
@@ -35,7 +72,9 @@ class GaugeAlgTest : public ::testing::Test
   int novrsteps;
   bool coldstart;
   double beta_value;
-
+  
+  bool unit_test;
+  
   RNG *randstates;
 
   void SetReunitarizationConsts()
@@ -64,45 +103,195 @@ class GaugeAlgTest : public ::testing::Test
     a2 = std::abs(a.z - b.z);
     double prec_val = 1.0e-5;
     if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1.0e-15;
-    if ((a0 < prec_val) && (a1 < prec_val) && (a2 < prec_val)) return true;
-    return false;
+    return ((a0 < prec_val) && (a1 < prec_val) && (a2 < prec_val));
   }
 
   bool CheckDeterminant(double2 detu)
   {
     double prec_val = 5e-8;
     if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1.0e-15;
-    if (std::abs(1.0 - detu.x) < prec_val && std::abs(detu.y) < prec_val) return true;
-    return false;
+    return (std::abs(1.0 - detu.x) < prec_val && std::abs(detu.y) < prec_val);
   }
 
   virtual void SetUp()
   {
-    setVerbosity(QUDA_VERBOSE);
-
-    param = newQudaGaugeParam();
-
-    // Setup gauge container.
-    param.cpu_prec = prec;
-    param.cpu_prec = prec;
-    param.cuda_prec = prec;
-    param.reconstruct = link_recon;
-    param.cuda_prec_sloppy = prec;
-    param.reconstruct_sloppy = link_recon;
-
-    param.type = QUDA_WILSON_LINKS;
-    param.gauge_order = QUDA_MILC_GAUGE_ORDER;
-
-    param.X[0] = xdim;
-    param.X[1] = ydim;
-    param.X[2] = zdim;
-    param.X[3] = tdim;
-    setDims(param.X);
+    if(execute) {
+      setVerbosity(QUDA_VERBOSE);
+      param = newQudaGaugeParam();
+      
+      // Setup gauge container.
+      param.cpu_prec = prec;
+      param.cpu_prec = prec;
+      param.cuda_prec = prec;
+      param.reconstruct = link_recon;
+      param.cuda_prec_sloppy = prec;
+      param.reconstruct_sloppy = link_recon;
+      
+      param.type = QUDA_WILSON_LINKS;
+      param.gauge_order = QUDA_MILC_GAUGE_ORDER;
+      
+      param.X[0] = xdim;
+      param.X[1] = ydim;
+      param.X[2] = zdim;
+      param.X[3] = tdim;
+      setDims(param.X);
+      
+      param.anisotropy = 1.0; // don't support anisotropy for now!!!!!!
+      param.t_boundary = QUDA_PERIODIC_T;
+      param.gauge_fix = QUDA_GAUGE_FIXED_NO;
+      param.ga_pad = 0;
+      
+      GaugeFieldParam gParam(0, param);
+      gParam.pad = 0;
+      gParam.ghostExchange = QUDA_GHOST_EXCHANGE_NO;
+      gParam.create = QUDA_NULL_FIELD_CREATE;
+      gParam.link_type = param.type;
+      gParam.reconstruct = param.reconstruct;
+      gParam.setPrecision(gParam.Precision(), true);
+      
+#ifdef MULTI_GPU
+      int y[4];
+      int R[4] = {0, 0, 0, 0};
+      for (int dir = 0; dir < 4; ++dir)
+	if (comm_dim_partitioned(dir)) R[dir] = 2;
+      for (int dir = 0; dir < 4; ++dir) y[dir] = param.X[dir] + 2 * R[dir];
+      int pad = 0;
+      GaugeFieldParam gParamEx(y, prec, link_recon, pad, QUDA_VECTOR_GEOMETRY, QUDA_GHOST_EXCHANGE_EXTENDED);
+      gParamEx.create = QUDA_ZERO_FIELD_CREATE;
+      gParamEx.order = gParam.order;
+      gParamEx.siteSubset = QUDA_FULL_SITE_SUBSET;
+      gParamEx.t_boundary = gParam.t_boundary;
+      gParamEx.nFace = 1;
+      for (int dir = 0; dir < 4; ++dir) gParamEx.r[dir] = R[dir];
+      U = new cudaGaugeField(gParamEx);
+#else
+      U = new cudaGaugeField(gParam);
+#endif
+      // CURAND random generator initialization
+      randstates = new RNG(gParam, 1234);
+      randstates->Init();
+      
+      nsteps = heatbath_num_steps;
+      nhbsteps = heatbath_num_heatbath_per_step;
+      novrsteps = heatbath_num_overrelax_per_step;
+      coldstart = heatbath_coldstart;
+      beta_value = heatbath_beta_value;
+      
+      a0.Start(__func__, __FILE__, __LINE__);
+      a1.Start(__func__, __FILE__, __LINE__);
+      
+      int *num_failures_h = (int *)mapped_malloc(sizeof(int));
+      int *num_failures_d = (int *)get_mapped_device_pointer(num_failures_h);
+      
+      if (link_recon != QUDA_RECONSTRUCT_8 && coldstart)
+	InitGaugeField(*U);
+      else
+	InitGaugeField(*U, *randstates);
+      
+      // Reunitarization setup
+      SetReunitarizationConsts();
+      plaquette(*U);
+      
+      for (int step = 1; step <= nsteps; ++step) {
+	printfQuda("Step %d\n", step);
+	Monte(*U, *randstates, beta_value, nhbsteps, novrsteps);
+	
+	// Reunitarize gauge links...
+	*num_failures_h = 0;
+	unitarizeLinks(*U, num_failures_d);
+	qudaDeviceSynchronize();
+	if (*num_failures_h > 0) errorQuda("Error in the unitarization\n");
+	
+	plaquette(*U);
+      }
+      a1.Stop(__func__, __FILE__, __LINE__);
+      
+      printfQuda("Time Monte -> %.6f s\n", a1.Last());
+      plaq = plaquette(*U);
+      printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z);
+      
+      host_free(num_failures_h);
+
+      // If a specific test type is requested, perfrom it now and then
+      // turn off all Google tests in the tear down.
+      switch (test_type) {
+      case 0:
+	// Do the Google testing
+	break;
+      case 1:
+	run_ovr();
+	break;
+      case 2:
+	run_fft();
+	break;    
+      default:
+	errorQuda("Invalid test type %d ", test_type);
+      }
+    }
+  }
+  
+  virtual void TearDown()
+  {
+    if(execute) {
+      detu = getLinkDeterminant(*U);
+      double2 tru = getLinkTrace(*U);
+      printfQuda("Det: %.16e:%.16e\n", detu.x, detu.y);
+      printfQuda("Tr: %.16e:%.16e\n", tru.x / 3.0, tru.y / 3.0);
+      
+      delete U;
+      // Release all temporary memory used for data exchange between GPUs in multi-GPU mode
+      PGaugeExchangeFree();
+      
+      a0.Stop(__func__, __FILE__, __LINE__);
+      printfQuda("Time -> %.6f s\n", a0.Last());
+      randstates->Release();
+      delete randstates;      
+    }
+    // If we performed a specific instance, switch off the
+    // Google testing.
+    if(test_type != 0) execute = false;
+  }
+  
+  virtual void run_ovr()
+  {
+    if(execute) {
+      gaugeFixingOVR(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval,
+		     gf_theta_condition);
+      auto plaq_gf = plaquette(*U);
+      printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
+      ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
+      // Save if output string is specified
+      if (strcmp(gauge_outfile, "")) save_gauge();
+    }
+  }
+  virtual void run_fft()
+  {
+    if(execute) {
+      if (!checkDimsPartitioned()) {
+	printfQuda("Landau gauge fixing with steepest descent method with FFTs\n");
+	gaugeFixingFFT(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance,
+		       gf_theta_condition);
+	
+	auto plaq_gf = plaquette(*U);	
+	printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
+	ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
+	// Save if output string is specified
+	if (strcmp(gauge_outfile, "")) save_gauge();
+      } else {
+	errorQuda("Cannot perform FFT gauge fixing with MPI partitions.");
+      }
+    }
+  }
 
-    param.anisotropy = 1.0; // don't support anisotropy for now!!!!!!
-    param.t_boundary = QUDA_PERIODIC_T;
-    param.gauge_fix = QUDA_GAUGE_FIXED_NO;
-    param.ga_pad = 0;
+  virtual void save_gauge() {
+    
+    printfQuda("Saving the gauge field to file %s\n", gauge_outfile);
+    
+    QudaGaugeParam gauge_param = newQudaGaugeParam();
+    setWilsonGaugeParam(gauge_param);
+    
+    void *cpu_gauge[4];
+    for (int dir = 0; dir < 4; dir++) { cpu_gauge[dir] = malloc(V * gauge_site_size * gauge_param.cpu_prec); }
 
     GaugeFieldParam gParam(0, param);
     gParam.pad = 0;
@@ -111,168 +300,124 @@ class GaugeAlgTest : public ::testing::Test
     gParam.link_type = param.type;
     gParam.reconstruct = param.reconstruct;
     gParam.setPrecision(gParam.Precision(), true);
-
-#ifdef MULTI_GPU
-    int y[4];
-    int R[4] = {0, 0, 0, 0};
-    for (int dir = 0; dir < 4; ++dir)
-      if (comm_dim_partitioned(dir)) R[dir] = 2;
-    for (int dir = 0; dir < 4; ++dir) y[dir] = param.X[dir] + 2 * R[dir];
-    int pad = 0;
-    GaugeFieldParam gParamEx(y, prec, link_recon, pad, QUDA_VECTOR_GEOMETRY, QUDA_GHOST_EXCHANGE_EXTENDED);
-    gParamEx.create = QUDA_ZERO_FIELD_CREATE;
-    gParamEx.order = gParam.order;
-    gParamEx.siteSubset = QUDA_FULL_SITE_SUBSET;
-    gParamEx.t_boundary = gParam.t_boundary;
-    gParamEx.nFace = 1;
-    for (int dir = 0; dir < 4; ++dir) gParamEx.r[dir] = R[dir];
-    U = new cudaGaugeField(gParamEx);
-#else
-    U = new cudaGaugeField(gParam);
-#endif
-    // CURAND random generator initialization
-    randstates = new RNG(gParam, 1234);
-    randstates->Init();
-
-    nsteps = heatbath_num_steps;
-    nhbsteps = heatbath_num_heatbath_per_step;
-    novrsteps = heatbath_num_overrelax_per_step;
-    coldstart = heatbath_coldstart;
-    beta_value = heatbath_beta_value;
-
-    a0.Start(__func__, __FILE__, __LINE__);
-    a1.Start(__func__, __FILE__, __LINE__);
-
-    int *num_failures_h = (int *)mapped_malloc(sizeof(int));
-    int *num_failures_d = (int *)get_mapped_device_pointer(num_failures_h);
-
-    if (link_recon != QUDA_RECONSTRUCT_8 && coldstart)
-      InitGaugeField(*U);
-    else
-      InitGaugeField(*U, *randstates);
-
-    // Reunitarization setup
-    SetReunitarizationConsts();
-    plaquette(*U);
-
-    for (int step = 1; step <= nsteps; ++step) {
-      printfQuda("Step %d\n", step);
-      Monte(*U, *randstates, beta_value, nhbsteps, novrsteps);
-
-      // Reunitarize gauge links...
-      *num_failures_h = 0;
-      unitarizeLinks(*U, num_failures_d);
-      qudaDeviceSynchronize();
-      if (*num_failures_h > 0) errorQuda("Error in the unitarization\n");
-
-      plaquette(*U);
-    }
-    a1.Stop(__func__, __FILE__, __LINE__);
-
-    printfQuda("Time Monte -> %.6f s\n", a1.Last());
-    plaq = plaquette(*U);
-    printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z);
-
-    host_free(num_failures_h);
-  }
-
-  virtual void TearDown()
-  {
-    detu = getLinkDeterminant(*U);
-    double2 tru = getLinkTrace(*U);
-    printfQuda("Det: %.16e:%.16e\n", detu.x, detu.y);
-    printfQuda("Tr: %.16e:%.16e\n", tru.x / 3.0, tru.y / 3.0);
-
-    delete U;
-    // Release all temporary memory used for data exchange between GPUs in multi-GPU mode
-    PGaugeExchangeFree();
-
-    a0.Stop(__func__, __FILE__, __LINE__);
-    printfQuda("Time -> %.6f s\n", a0.Last());
-    randstates->Release();
-    delete randstates;
+  
+    cudaGaugeField *gauge;
+    gauge = new cudaGaugeField(gParam);
+    
+    // copy into regular field
+    copyExtendedGauge(*gauge, *U, QUDA_CUDA_FIELD_LOCATION);
+    saveGaugeFieldQuda((void *)cpu_gauge, (void *)gauge, &gauge_param);
+      
+    // Write to disk
+    write_gauge_field(gauge_outfile, cpu_gauge, gauge_param.cpu_prec, gauge_param.X, 0, (char **)0);
+    
+    for (int dir = 0; dir < 4; dir++) free(cpu_gauge[dir]);
+    delete gauge;
   }
 };
-
+  
 TEST_F(GaugeAlgTest, Generation)
 {
-  detu = getLinkDeterminant(*U);
-  ASSERT_TRUE(CheckDeterminant(detu));
+  if(execute) {
+    detu = getLinkDeterminant(*U);
+    ASSERT_TRUE(CheckDeterminant(detu));
+  }
 }
 
 TEST_F(GaugeAlgTest, Landau_Overrelaxation)
 {
-  printfQuda("Landau gauge fixing with overrelaxation\n");
-  gaugeFixingOVR(*U, 4, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval,
-                 gf_theta_condition);
-  auto plaq_gf = plaquette(*U);
-  printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
-  ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
+  if(execute) {
+    printfQuda("Landau gauge fixing with overrelaxation\n");
+    gaugeFixingOVR(*U, 4, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval,
+		   gf_theta_condition);
+    auto plaq_gf = plaquette(*U);
+    printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
+    ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
+  }
 }
 
 TEST_F(GaugeAlgTest, Coulomb_Overrelaxation)
 {
-  printfQuda("Coulomb gauge fixing with overrelaxation\n");
-  gaugeFixingOVR(*U, 3, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval,
-                 gf_theta_condition);
-  auto plaq_gf = plaquette(*U);
-  printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
-  ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
+  if(execute) {
+    printfQuda("Coulomb gauge fixing with overrelaxation\n");
+    gaugeFixingOVR(*U, 3, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval,
+		   gf_theta_condition);
+    auto plaq_gf = plaquette(*U);
+    printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
+    ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
+  }
 }
 
 TEST_F(GaugeAlgTest, Landau_FFT)
 {
-  if (!checkDimsPartitioned()) {
-    printfQuda("Landau gauge fixing with steepest descent method with FFTs\n");
-    gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance,
-                   gf_theta_condition);
-    auto plaq_gf = plaquette(*U);
-    printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
-    ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
+  if(execute) {
+    if (!checkDimsPartitioned()) {
+      printfQuda("Landau gauge fixing with steepest descent method with FFTs\n");
+      gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance,
+		     gf_theta_condition);
+      auto plaq_gf = plaquette(*U);
+      printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
+      ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
+    }
   }
 }
 
 TEST_F(GaugeAlgTest, Coulomb_FFT)
 {
-  if (!checkDimsPartitioned()) {
-    printfQuda("Coulomb gauge fixing with steepest descent method with FFTs\n");
-    gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance,
-                   gf_theta_condition);
-    auto plaq_gf = plaquette(*U);
-    printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
-    ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
+  if(execute) {
+    if (!checkDimsPartitioned()) {
+      printfQuda("Coulomb gauge fixing with steepest descent method with FFTs\n");
+      gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance,
+		     gf_theta_condition);
+      auto plaq_gf = plaquette(*U);
+      printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
+      ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
+    }
   }
 }
 
 int main(int argc, char **argv)
 {
-  // initalize google test, includes command line options
-  ::testing::InitGoogleTest(&argc, argv);
-  // return code for google test
-  int test_rc = 0;
-  xdim = ydim = zdim = tdim = 32;
-
   // command line options
   auto app = make_app();
   add_gaugefix_option_group(app);
   add_heatbath_option_group(app);
+  
+  test_type = 0;
+  CLI::TransformPairs<int> test_type_map {{"Google", 0}, {"OVR", 1}, {"FFT", 2}};
+  app->add_option("--test", test_type, "Test method")->transform(CLI::CheckedTransformer(test_type_map));
   try {
     app->parse(argc, argv);
   } catch (const CLI::ParseError &e) {
     return app->exit(e);
   }
 
+  if (prec_sloppy == QUDA_INVALID_PRECISION) prec_sloppy = prec;
+  if (link_recon_sloppy == QUDA_RECONSTRUCT_INVALID) link_recon_sloppy = link_recon;
+
+  // initialize QMP/MPI, QUDA comms grid and RNG (host_utils.cpp)  
   initComms(argc, argv, gridsize_from_cmdline);
 
+  // call srand() with a rank-dependent seed
+  initRand();
+  
+  display_test_info();
+
+  // initialize the QUDA library
+  initQuda(device_ordinal);
+
+  // initalize google test, includes command line options
+  ::testing::InitGoogleTest(&argc, argv);
+
   // Ensure gtest prints only from rank 0
   ::testing::TestEventListeners &listeners = ::testing::UnitTest::GetInstance()->listeners();
   if (comm_rank() != 0) { delete listeners.Release(listeners.default_result_printer()); }
 
-  initQuda(device_ordinal);
-  test_rc = RUN_ALL_TESTS();
-  endQuda();
-
+  // return code for google test
+  int test_rc = RUN_ALL_TESTS();
+  
+  endQuda();  
   finalizeComms();
-
+  
   return test_rc;
 }

From fe83fa846abd57505fa40461c0938ee48a91817a Mon Sep 17 00:00:00 2001
From: cpviolator <dmhowarth26@gmail.com>
Date: Sat, 1 May 2021 18:01:05 -0700
Subject: [PATCH 12/32] Clang tidy

---
 include/quda.h                |   3 +-
 include/quda_milc_interface.h |   8 +-
 tests/gauge_alg_ctest.cpp     | 180 ++++++++++++++++------------------
 3 files changed, 91 insertions(+), 100 deletions(-)

diff --git a/include/quda.h b/include/quda.h
index 95952a648c..63d09b8992 100644
--- a/include/quda.h
+++ b/include/quda.h
@@ -1510,7 +1510,8 @@ extern "C" {
    * @param[in] Nsteps, maximum number of steps to perform gauge fixing
    * @param[in] verbose_interval, print gauge fixing info when iteration count is a multiple of this
    * @param[in] relax_boost, gauge fixing parameter of the overrelaxation method, most common value is 1.5 or 1.7.
-   * @param[in] tolerance, torelance value to stop the method, if this value is zero then the method stops when iteration reachs the maximum number of steps defined by Nsteps
+   * @param[in] tolerance, torelance value to stop the method, if this value is zero then the method stops when
+   * iteration reachs the maximum number of steps defined by Nsteps
    * @param[in] reunit_interval, reunitarize gauge field when iteration count is a multiple of this
    * @param[in] stopWtheta, 0 for MILC criterion and 1 to use the theta value
    * @param[in] param The parameters of the external fields and the computation settings
diff --git a/include/quda_milc_interface.h b/include/quda_milc_interface.h
index 2bc8b5900e..1f45d1bae7 100644
--- a/include/quda_milc_interface.h
+++ b/include/quda_milc_interface.h
@@ -947,7 +947,6 @@ extern "C" {
    */
   void qudaDestroyGaugeField(void* gauge);
 
-
   /**
    * @brief Gauge fixing with overrelaxation with support for single and multi GPU.
    * @param[in] precision, 1 for single precision else for double precision
@@ -955,7 +954,8 @@ extern "C" {
    * @param[in] Nsteps, maximum number of steps to perform gauge fixing
    * @param[in] verbose_interval, print gauge fixing info when iteration count is a multiple of this
    * @param[in] relax_boost, gauge fixing parameter of the overrelaxation method, most common value is 1.5 or 1.7.
-   * @param[in] tolerance, torelance value to stop the method, if this value is zero then the method stops when iteration reachs the maximum number of steps defined by Nsteps
+   * @param[in] tolerance, torelance value to stop the method, if this value is zero then the method stops when
+   * iteration reachs the maximum number of steps defined by Nsteps
    * @param[in] reunit_interval, reunitarize gauge field when iteration count is a multiple of this
    * @param[in] stopWtheta, 0 for MILC criterion and 1 to use the theta value
    * @param[in,out] milc_sitelink, MILC gauge field to be fixed
@@ -971,7 +971,6 @@ extern "C" {
     void* milc_sitelink
     );
 
-
   /**
    * @brief Gauge fixing with Steepest descent method with FFTs with support for single GPU only.
    * @param[in] precision, 1 for single precision else for double precision
@@ -980,7 +979,8 @@ extern "C" {
    * @param[in] verbose_interval, print gauge fixing info when iteration count is a multiple of this
    * @param[in] alpha, gauge fixing parameter of the method, most common value is 0.08
    * @param[in] autotune, 1 to autotune the method, i.e., if the Fg inverts its tendency we decrease the alpha value
-   * @param[in] tolerance, torelance value to stop the method, if this value is zero then the method stops when iteration reachs the maximum number of steps defined by Nsteps
+   * @param[in] tolerance, torelance value to stop the method, if this value is zero then the method stops when
+   * iteration reachs the maximum number of steps defined by Nsteps
    * @param[in] stopWtheta, 0 for MILC criterion and 1 to use the theta value
    * @param[in,out] milc_sitelink, MILC gauge field to be fixed
    */
diff --git a/tests/gauge_alg_ctest.cpp b/tests/gauge_alg_ctest.cpp
index 881d53bb76..f797420c9c 100644
--- a/tests/gauge_alg_ctest.cpp
+++ b/tests/gauge_alg_ctest.cpp
@@ -36,18 +36,12 @@ void display_test_info()
   printfQuda("running the following test:\n");
 
   switch (test_type) {
-  case 0:
-    printfQuda("\n Google testing\n");
-    break;
-  case 1:
-    printfQuda("\nOVR gauge fix\n");
-    break;
-  case 2:
-    printfQuda("\nFFT gauge fix\n");
-    break;
+  case 0: printfQuda("\n Google testing\n"); break;
+  case 1: printfQuda("\nOVR gauge fix\n"); break;
+  case 2: printfQuda("\nFFT gauge fix\n"); break;
   default: errorQuda("Undefined test type %d given", test_type);
   }
-  
+
   printfQuda("prec    sloppy_prec    link_recon  sloppy_link_recon S_dimension T_dimension Ls_dimension\n");
   printfQuda("%s   %s             %s            %s            %d/%d/%d          %d         %d\n", get_prec_str(prec),
              get_prec_str(prec_sloppy), get_recon_str(link_recon), get_recon_str(link_recon_sloppy), xdim, ydim, zdim,
@@ -72,9 +66,9 @@ class GaugeAlgTest : public ::testing::Test
   int novrsteps;
   bool coldstart;
   double beta_value;
-  
+
   bool unit_test;
-  
+
   RNG *randstates;
 
   void SetReunitarizationConsts()
@@ -115,10 +109,10 @@ class GaugeAlgTest : public ::testing::Test
 
   virtual void SetUp()
   {
-    if(execute) {
+    if (execute) {
       setVerbosity(QUDA_VERBOSE);
       param = newQudaGaugeParam();
-      
+
       // Setup gauge container.
       param.cpu_prec = prec;
       param.cpu_prec = prec;
@@ -126,21 +120,21 @@ class GaugeAlgTest : public ::testing::Test
       param.reconstruct = link_recon;
       param.cuda_prec_sloppy = prec;
       param.reconstruct_sloppy = link_recon;
-      
+
       param.type = QUDA_WILSON_LINKS;
       param.gauge_order = QUDA_MILC_GAUGE_ORDER;
-      
+
       param.X[0] = xdim;
       param.X[1] = ydim;
       param.X[2] = zdim;
       param.X[3] = tdim;
       setDims(param.X);
-      
+
       param.anisotropy = 1.0; // don't support anisotropy for now!!!!!!
       param.t_boundary = QUDA_PERIODIC_T;
       param.gauge_fix = QUDA_GAUGE_FIXED_NO;
       param.ga_pad = 0;
-      
+
       GaugeFieldParam gParam(0, param);
       gParam.pad = 0;
       gParam.ghostExchange = QUDA_GHOST_EXCHANGE_NO;
@@ -148,12 +142,12 @@ class GaugeAlgTest : public ::testing::Test
       gParam.link_type = param.type;
       gParam.reconstruct = param.reconstruct;
       gParam.setPrecision(gParam.Precision(), true);
-      
+
 #ifdef MULTI_GPU
       int y[4];
       int R[4] = {0, 0, 0, 0};
       for (int dir = 0; dir < 4; ++dir)
-	if (comm_dim_partitioned(dir)) R[dir] = 2;
+        if (comm_dim_partitioned(dir)) R[dir] = 2;
       for (int dir = 0; dir < 4; ++dir) y[dir] = param.X[dir] + 2 * R[dir];
       int pad = 0;
       GaugeFieldParam gParamEx(y, prec, link_recon, pad, QUDA_VECTOR_GEOMETRY, QUDA_GHOST_EXCHANGE_EXTENDED);
@@ -170,93 +164,88 @@ class GaugeAlgTest : public ::testing::Test
       // CURAND random generator initialization
       randstates = new RNG(gParam, 1234);
       randstates->Init();
-      
+
       nsteps = heatbath_num_steps;
       nhbsteps = heatbath_num_heatbath_per_step;
       novrsteps = heatbath_num_overrelax_per_step;
       coldstart = heatbath_coldstart;
       beta_value = heatbath_beta_value;
-      
+
       a0.Start(__func__, __FILE__, __LINE__);
       a1.Start(__func__, __FILE__, __LINE__);
-      
+
       int *num_failures_h = (int *)mapped_malloc(sizeof(int));
       int *num_failures_d = (int *)get_mapped_device_pointer(num_failures_h);
-      
+
       if (link_recon != QUDA_RECONSTRUCT_8 && coldstart)
-	InitGaugeField(*U);
+        InitGaugeField(*U);
       else
-	InitGaugeField(*U, *randstates);
-      
+        InitGaugeField(*U, *randstates);
+
       // Reunitarization setup
       SetReunitarizationConsts();
       plaquette(*U);
-      
+
       for (int step = 1; step <= nsteps; ++step) {
-	printfQuda("Step %d\n", step);
-	Monte(*U, *randstates, beta_value, nhbsteps, novrsteps);
-	
-	// Reunitarize gauge links...
-	*num_failures_h = 0;
-	unitarizeLinks(*U, num_failures_d);
-	qudaDeviceSynchronize();
-	if (*num_failures_h > 0) errorQuda("Error in the unitarization\n");
-	
-	plaquette(*U);
+        printfQuda("Step %d\n", step);
+        Monte(*U, *randstates, beta_value, nhbsteps, novrsteps);
+
+        // Reunitarize gauge links...
+        *num_failures_h = 0;
+        unitarizeLinks(*U, num_failures_d);
+        qudaDeviceSynchronize();
+        if (*num_failures_h > 0) errorQuda("Error in the unitarization\n");
+
+        plaquette(*U);
       }
       a1.Stop(__func__, __FILE__, __LINE__);
-      
+
       printfQuda("Time Monte -> %.6f s\n", a1.Last());
       plaq = plaquette(*U);
       printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z);
-      
+
       host_free(num_failures_h);
 
       // If a specific test type is requested, perfrom it now and then
       // turn off all Google tests in the tear down.
       switch (test_type) {
       case 0:
-	// Do the Google testing
-	break;
-      case 1:
-	run_ovr();
-	break;
-      case 2:
-	run_fft();
-	break;    
-      default:
-	errorQuda("Invalid test type %d ", test_type);
+        // Do the Google testing
+        break;
+      case 1: run_ovr(); break;
+      case 2: run_fft(); break;
+      default: errorQuda("Invalid test type %d ", test_type);
       }
     }
   }
-  
+
   virtual void TearDown()
   {
-    if(execute) {
+    if (execute) {
       detu = getLinkDeterminant(*U);
       double2 tru = getLinkTrace(*U);
       printfQuda("Det: %.16e:%.16e\n", detu.x, detu.y);
       printfQuda("Tr: %.16e:%.16e\n", tru.x / 3.0, tru.y / 3.0);
-      
+
       delete U;
       // Release all temporary memory used for data exchange between GPUs in multi-GPU mode
       PGaugeExchangeFree();
-      
+
       a0.Stop(__func__, __FILE__, __LINE__);
       printfQuda("Time -> %.6f s\n", a0.Last());
       randstates->Release();
-      delete randstates;      
+      delete randstates;
     }
     // If we performed a specific instance, switch off the
     // Google testing.
-    if(test_type != 0) execute = false;
+    if (test_type != 0) execute = false;
   }
-  
+
   virtual void run_ovr()
   {
-    if(execute) {
-      gaugeFixingOVR(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval,
-		     gf_theta_condition);
+    if (execute) {
+      gaugeFixingOVR(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance,
+                     gf_reunit_interval, gf_theta_condition);
       auto plaq_gf = plaquette(*U);
       printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
       ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
@@ -266,30 +255,31 @@ class GaugeAlgTest : public ::testing::Test
   }
   virtual void run_fft()
   {
-    if(execute) {
+    if (execute) {
       if (!checkDimsPartitioned()) {
-	printfQuda("Landau gauge fixing with steepest descent method with FFTs\n");
-	gaugeFixingFFT(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance,
-		       gf_theta_condition);
-	
-	auto plaq_gf = plaquette(*U);	
-	printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
-	ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
-	// Save if output string is specified
-	if (strcmp(gauge_outfile, "")) save_gauge();
+        printfQuda("Landau gauge fixing with steepest descent method with FFTs\n");
+        gaugeFixingFFT(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance,
+                       gf_theta_condition);
+
+        auto plaq_gf = plaquette(*U);
+        printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
+        ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
+        // Save if output string is specified
+        if (strcmp(gauge_outfile, "")) save_gauge();
       } else {
-	errorQuda("Cannot perform FFT gauge fixing with MPI partitions.");
+        errorQuda("Cannot perform FFT gauge fixing with MPI partitions.");
       }
     }
   }
 
-  virtual void save_gauge() {
-    
+  virtual void save_gauge()
+  {
+
     printfQuda("Saving the gauge field to file %s\n", gauge_outfile);
-    
+
     QudaGaugeParam gauge_param = newQudaGaugeParam();
     setWilsonGaugeParam(gauge_param);
-    
+
     void *cpu_gauge[4];
     for (int dir = 0; dir < 4; dir++) { cpu_gauge[dir] = malloc(V * gauge_site_size * gauge_param.cpu_prec); }
 
@@ -300,25 +290,25 @@ class GaugeAlgTest : public ::testing::Test
     gParam.link_type = param.type;
     gParam.reconstruct = param.reconstruct;
     gParam.setPrecision(gParam.Precision(), true);
-  
+
     cudaGaugeField *gauge;
     gauge = new cudaGaugeField(gParam);
-    
+
     // copy into regular field
     copyExtendedGauge(*gauge, *U, QUDA_CUDA_FIELD_LOCATION);
     saveGaugeFieldQuda((void *)cpu_gauge, (void *)gauge, &gauge_param);
-      
+
     // Write to disk
     write_gauge_field(gauge_outfile, cpu_gauge, gauge_param.cpu_prec, gauge_param.X, 0, (char **)0);
-    
+
     for (int dir = 0; dir < 4; dir++) free(cpu_gauge[dir]);
     delete gauge;
   }
 };
-  
+
 TEST_F(GaugeAlgTest, Generation)
 {
-  if(execute) {
+  if (execute) {
     detu = getLinkDeterminant(*U);
     ASSERT_TRUE(CheckDeterminant(detu));
   }
@@ -326,10 +316,10 @@ TEST_F(GaugeAlgTest, Generation)
 
 TEST_F(GaugeAlgTest, Landau_Overrelaxation)
 {
-  if(execute) {
+  if (execute) {
     printfQuda("Landau gauge fixing with overrelaxation\n");
     gaugeFixingOVR(*U, 4, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval,
-		   gf_theta_condition);
+                   gf_theta_condition);
     auto plaq_gf = plaquette(*U);
     printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
     ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
@@ -338,10 +328,10 @@ TEST_F(GaugeAlgTest, Landau_Overrelaxation)
 
 TEST_F(GaugeAlgTest, Coulomb_Overrelaxation)
 {
-  if(execute) {
+  if (execute) {
     printfQuda("Coulomb gauge fixing with overrelaxation\n");
     gaugeFixingOVR(*U, 3, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval,
-		   gf_theta_condition);
+                   gf_theta_condition);
     auto plaq_gf = plaquette(*U);
     printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
     ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
@@ -350,11 +340,11 @@ TEST_F(GaugeAlgTest, Coulomb_Overrelaxation)
 
 TEST_F(GaugeAlgTest, Landau_FFT)
 {
-  if(execute) {
+  if (execute) {
     if (!checkDimsPartitioned()) {
       printfQuda("Landau gauge fixing with steepest descent method with FFTs\n");
       gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance,
-		     gf_theta_condition);
+                     gf_theta_condition);
       auto plaq_gf = plaquette(*U);
       printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
       ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
@@ -364,11 +354,11 @@ TEST_F(GaugeAlgTest, Landau_FFT)
 
 TEST_F(GaugeAlgTest, Coulomb_FFT)
 {
-  if(execute) {
+  if (execute) {
     if (!checkDimsPartitioned()) {
       printfQuda("Coulomb gauge fixing with steepest descent method with FFTs\n");
       gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance,
-		     gf_theta_condition);
+                     gf_theta_condition);
       auto plaq_gf = plaquette(*U);
       printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
       ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
@@ -382,7 +372,7 @@ int main(int argc, char **argv)
   auto app = make_app();
   add_gaugefix_option_group(app);
   add_heatbath_option_group(app);
-  
+
   test_type = 0;
   CLI::TransformPairs<int> test_type_map {{"Google", 0}, {"OVR", 1}, {"FFT", 2}};
   app->add_option("--test", test_type, "Test method")->transform(CLI::CheckedTransformer(test_type_map));
@@ -395,12 +385,12 @@ int main(int argc, char **argv)
   if (prec_sloppy == QUDA_INVALID_PRECISION) prec_sloppy = prec;
   if (link_recon_sloppy == QUDA_RECONSTRUCT_INVALID) link_recon_sloppy = link_recon;
 
-  // initialize QMP/MPI, QUDA comms grid and RNG (host_utils.cpp)  
+  // initialize QMP/MPI, QUDA comms grid and RNG (host_utils.cpp)
   initComms(argc, argv, gridsize_from_cmdline);
 
   // call srand() with a rank-dependent seed
   initRand();
-  
+
   display_test_info();
 
   // initialize the QUDA library
@@ -415,9 +405,9 @@ int main(int argc, char **argv)
 
   // return code for google test
   int test_rc = RUN_ALL_TESTS();
-  
-  endQuda();  
+
+  endQuda();
   finalizeComms();
-  
+
   return test_rc;
 }

From 59c3beb0507301a3e466054b403c2365587bf656 Mon Sep 17 00:00:00 2001
From: cpviolator <dmhowarth26@gmail.com>
Date: Thu, 5 Aug 2021 14:35:27 -0700
Subject: [PATCH 13/32] Rebase to GK

---
 lib/interface_quda.cpp    |   2 +-
 tests/gauge_alg_ctest.cpp | 330 +++++++++++++++--------------
 tests/gauge_alg_test.cpp  | 429 --------------------------------------
 3 files changed, 174 insertions(+), 587 deletions(-)
 delete mode 100644 tests/gauge_alg_test.cpp

diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp
index 7f75793180..73cdb76b45 100644
--- a/lib/interface_quda.cpp
+++ b/lib/interface_quda.cpp
@@ -5545,7 +5545,7 @@ int computeGaugeFixingOVRQuda(void *gauge, const unsigned int gauge_dir, const u
   checkGaugeParam(param);
 
   profileGaugeFixOVR.TPSTART(QUDA_PROFILE_INIT);
-  GaugeFieldParam gParam(gauge, *param);
+  GaugeFieldParam gParam(*param);
   auto *cpuGauge = new cpuGaugeField(gParam);
 
   // gParam.pad = getFatLinkPadding(param->X);
diff --git a/tests/gauge_alg_ctest.cpp b/tests/gauge_alg_ctest.cpp
index f797420c9c..0bfeaa8189 100644
--- a/tests/gauge_alg_ctest.cpp
+++ b/tests/gauge_alg_ctest.cpp
@@ -10,7 +10,10 @@
 #include <host_utils.h>
 #include <command_line_params.h>
 #include <misc.h>
+#include <timer.h>
+
 #include <gauge_tools.h>
+#include <tune_quda.h>
 
 #include <pgauge_monte.h>
 #include <random_quda.h>
@@ -31,6 +34,11 @@ using namespace quda;
 //***********************************************************//
 bool execute = true;
 
+bool gauge_load;
+bool gauge_store;
+
+void *host_gauge[4];
+
 void display_test_info()
 {
   printfQuda("running the following test:\n");
@@ -57,10 +65,10 @@ class GaugeAlgTest : public ::testing::Test
 protected:
   QudaGaugeParam param;
 
-  Timer a0, a1;
+  Timer<false> a0, a1;
   double2 detu;
   double3 plaq;
-  cudaGaugeField *U;
+  GaugeField *U;
   int nsteps;
   int nhbsteps;
   int novrsteps;
@@ -69,8 +77,6 @@ class GaugeAlgTest : public ::testing::Test
 
   bool unit_test;
 
-  RNG *randstates;
-
   void SetReunitarizationConsts()
   {
     const double unitarize_eps = 1e-14;
@@ -82,13 +88,6 @@ class GaugeAlgTest : public ::testing::Test
     setUnitarizeLinksConstants(unitarize_eps, max_error, reunit_allow_svd, reunit_svd_only, svd_rel_error, svd_abs_error);
   }
 
-  bool checkDimsPartitioned()
-  {
-    if (comm_dim_partitioned(0) || comm_dim_partitioned(1) || comm_dim_partitioned(2) || comm_dim_partitioned(3))
-      return true;
-    return false;
-  }
-
   bool comparePlaquette(double3 a, double3 b)
   {
     double a0, a1, a2;
@@ -96,14 +95,14 @@ class GaugeAlgTest : public ::testing::Test
     a1 = std::abs(a.y - b.y);
     a2 = std::abs(a.z - b.z);
     double prec_val = 1.0e-5;
-    if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1.0e-15;
+    if (prec == QUDA_DOUBLE_PRECISION) prec_val = gf_tolerance*1e2;
     return ((a0 < prec_val) && (a1 < prec_val) && (a2 < prec_val));
   }
 
   bool CheckDeterminant(double2 detu)
   {
     double prec_val = 5e-8;
-    if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1.0e-15;
+    if (prec == QUDA_DOUBLE_PRECISION) prec_val =  gf_tolerance*1e2;
     return (std::abs(1.0 - detu.x) < prec_val && std::abs(detu.y) < prec_val);
   }
 
@@ -114,108 +113,111 @@ class GaugeAlgTest : public ::testing::Test
       param = newQudaGaugeParam();
 
       // Setup gauge container.
-      param.cpu_prec = prec;
-      param.cpu_prec = prec;
-      param.cuda_prec = prec;
-      param.reconstruct = link_recon;
-      param.cuda_prec_sloppy = prec;
-      param.reconstruct_sloppy = link_recon;
-
-      param.type = QUDA_WILSON_LINKS;
-      param.gauge_order = QUDA_MILC_GAUGE_ORDER;
-
-      param.X[0] = xdim;
-      param.X[1] = ydim;
-      param.X[2] = zdim;
-      param.X[3] = tdim;
-      setDims(param.X);
-
-      param.anisotropy = 1.0; // don't support anisotropy for now!!!!!!
+      setWilsonGaugeParam(param);
       param.t_boundary = QUDA_PERIODIC_T;
-      param.gauge_fix = QUDA_GAUGE_FIXED_NO;
-      param.ga_pad = 0;
-
-      GaugeFieldParam gParam(0, param);
-      gParam.pad = 0;
-      gParam.ghostExchange = QUDA_GHOST_EXCHANGE_NO;
-      gParam.create = QUDA_NULL_FIELD_CREATE;
-      gParam.link_type = param.type;
-      gParam.reconstruct = param.reconstruct;
-      gParam.setPrecision(gParam.Precision(), true);
-
-#ifdef MULTI_GPU
-      int y[4];
-      int R[4] = {0, 0, 0, 0};
-      for (int dir = 0; dir < 4; ++dir)
-        if (comm_dim_partitioned(dir)) R[dir] = 2;
-      for (int dir = 0; dir < 4; ++dir) y[dir] = param.X[dir] + 2 * R[dir];
-      int pad = 0;
-      GaugeFieldParam gParamEx(y, prec, link_recon, pad, QUDA_VECTOR_GEOMETRY, QUDA_GHOST_EXCHANGE_EXTENDED);
-      gParamEx.create = QUDA_ZERO_FIELD_CREATE;
-      gParamEx.order = gParam.order;
-      gParamEx.siteSubset = QUDA_FULL_SITE_SUBSET;
-      gParamEx.t_boundary = gParam.t_boundary;
-      gParamEx.nFace = 1;
-      for (int dir = 0; dir < 4; ++dir) gParamEx.r[dir] = R[dir];
-      U = new cudaGaugeField(gParamEx);
-#else
-      U = new cudaGaugeField(gParam);
-#endif
-      // CURAND random generator initialization
-      randstates = new RNG(gParam, 1234);
-      randstates->Init();
-
-      nsteps = heatbath_num_steps;
-      nhbsteps = heatbath_num_heatbath_per_step;
-      novrsteps = heatbath_num_overrelax_per_step;
-      coldstart = heatbath_coldstart;
-      beta_value = heatbath_beta_value;
-
-      a0.Start(__func__, __FILE__, __LINE__);
-      a1.Start(__func__, __FILE__, __LINE__);
 
+      // Reunitarization setup
       int *num_failures_h = (int *)mapped_malloc(sizeof(int));
       int *num_failures_d = (int *)get_mapped_device_pointer(num_failures_h);
-
-      if (link_recon != QUDA_RECONSTRUCT_8 && coldstart)
-        InitGaugeField(*U);
-      else
-        InitGaugeField(*U, *randstates);
-
-      // Reunitarization setup
       SetReunitarizationConsts();
-      plaquette(*U);
-
-      for (int step = 1; step <= nsteps; ++step) {
-        printfQuda("Step %d\n", step);
-        Monte(*U, *randstates, beta_value, nhbsteps, novrsteps);
 
-        // Reunitarize gauge links...
-        *num_failures_h = 0;
-        unitarizeLinks(*U, num_failures_d);
-        qudaDeviceSynchronize();
-        if (*num_failures_h > 0) errorQuda("Error in the unitarization\n");
+      a0.start();
+
+      // If no field is loaded, create a physical quenched field on the device
+      if (!gauge_load) {
+        GaugeFieldParam gParam(param);
+        gParam.ghostExchange = QUDA_GHOST_EXCHANGE_EXTENDED;
+        gParam.create = QUDA_NULL_FIELD_CREATE;
+        gParam.reconstruct = link_recon;
+        gParam.setPrecision(prec, true);
+        for (int d = 0; d < 4; d++) {
+          if (comm_dim_partitioned(d)) gParam.r[d] = 2;
+          gParam.x[d] += 2 * gParam.r[d];
+        }
+
+        U = new cudaGaugeField(gParam);
+
+        RNG randstates(*U, 1234);
+
+	nsteps = heatbath_num_steps;
+	nhbsteps = heatbath_num_heatbath_per_step;
+	novrsteps = heatbath_num_overrelax_per_step;
+	coldstart = heatbath_coldstart;
+	beta_value = heatbath_beta_value;
+	a1.start();
+
+	if (coldstart)
+	  InitGaugeField(*U);
+	else
+	  InitGaugeField(*U, randstates);
+
+	for (int step = 1; step <= nsteps; ++step) {
+	  printfQuda("Step %d\n", step);
+	  Monte(*U, randstates, beta_value, nhbsteps, novrsteps);
+
+	  // Reunitarization
+	  *num_failures_h = 0;
+	  unitarizeLinks(*U, num_failures_d);
+	  qudaDeviceSynchronize();
+	  if (*num_failures_h > 0) errorQuda("Error in the unitarization (%d errors)", *num_failures_h);
+	  plaq = plaquette(*U);
+	  printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z);
+	}
+
+	a1.stop();
+	printfQuda("Time Monte -> %.6f s\n", a1.last());
+      } else {
 
-        plaquette(*U);
+	// If a field is loaded, create a device field and copy
+	printfQuda("Copying gauge field from host\n");
+	param.location = QUDA_CPU_FIELD_LOCATION;
+	GaugeFieldParam gauge_field_param(param, host_gauge);
+	gauge_field_param.ghostExchange = QUDA_GHOST_EXCHANGE_NO;
+	GaugeField *host = GaugeField::Create(gauge_field_param);
+
+	// switch the parameters for creating the mirror precise cuda gauge field
+	gauge_field_param.create = QUDA_NULL_FIELD_CREATE;
+	gauge_field_param.reconstruct = param.reconstruct;
+	gauge_field_param.setPrecision(param.cuda_prec, true);
+
+	if (comm_partitioned()) {
+	  int R[4] = {0, 0, 0, 0};
+	  for (int d = 0; d < 4; d++) if (comm_dim_partitioned(d)) R[d] = 2;
+	  static TimeProfile GaugeFix("GaugeFix");
+	  cudaGaugeField *tmp = new cudaGaugeField(gauge_field_param);
+	  tmp->copy(*host);
+	  U = createExtendedGauge(*tmp, R, GaugeFix);
+	  delete tmp;
+	} else {
+	  U = new cudaGaugeField(gauge_field_param);
+	  U->copy(*host);
+	}
+
+	delete host;
+
+	// Reunitarization
+	*num_failures_h = 0;
+	unitarizeLinks(*U, num_failures_d);
+	qudaDeviceSynchronize();
+	if (*num_failures_h > 0) errorQuda("Error in the unitarization (%d errors)", *num_failures_h);
+
+	plaq = plaquette(*U);
+	printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z);
       }
-      a1.Stop(__func__, __FILE__, __LINE__);
-
-      printfQuda("Time Monte -> %.6f s\n", a1.Last());
-      plaq = plaquette(*U);
-      printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z);
 
-      host_free(num_failures_h);
 
       // If a specific test type is requested, perfrom it now and then
       // turn off all Google tests in the tear down.
       switch (test_type) {
       case 0:
-        // Do the Google testing
-        break;
+	// Do the Google testing
+	break;
       case 1: run_ovr(); break;
       case 2: run_fft(); break;
-      default: errorQuda("Invalid test type %d ", test_type);
+      default: errorQuda("Invalid test type %d", test_type);
       }
+
+      host_free(num_failures_h);
     }
   }
 
@@ -231,10 +233,8 @@ class GaugeAlgTest : public ::testing::Test
       // Release all temporary memory used for data exchange between GPUs in multi-GPU mode
       PGaugeExchangeFree();
 
-      a0.Stop(__func__, __FILE__, __LINE__);
-      printfQuda("Time -> %.6f s\n", a0.Last());
-      randstates->Release();
-      delete randstates;
+      a0.stop();
+      printfQuda("Time -> %.6f s\n", a0.last());
     }
     // If we performed a specific instance, switch off the
     // Google testing.
@@ -247,25 +247,29 @@ class GaugeAlgTest : public ::testing::Test
       gaugeFixingOVR(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance,
                      gf_reunit_interval, gf_theta_condition);
       auto plaq_gf = plaquette(*U);
-      printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
+      printfQuda("Plaq:    %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z);
+      printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
       ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
+      saveTuneCache();
       // Save if output string is specified
-      if (strcmp(gauge_outfile, "")) save_gauge();
+      if (gauge_store) save_gauge();
     }
   }
   virtual void run_fft()
   {
     if (execute) {
-      if (!checkDimsPartitioned()) {
+      if (!comm_partitioned()) {
         printfQuda("Landau gauge fixing with steepest descent method with FFTs\n");
         gaugeFixingFFT(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance,
                        gf_theta_condition);
 
         auto plaq_gf = plaquette(*U);
-        printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
+	printfQuda("Plaq:    %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z);
+	printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
         ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
+	saveTuneCache();
         // Save if output string is specified
-        if (strcmp(gauge_outfile, "")) save_gauge();
+        if (gauge_store) save_gauge();
       } else {
         errorQuda("Cannot perform FFT gauge fixing with MPI partitions.");
       }
@@ -274,25 +278,22 @@ class GaugeAlgTest : public ::testing::Test
 
   virtual void save_gauge()
   {
-
     printfQuda("Saving the gauge field to file %s\n", gauge_outfile);
 
     QudaGaugeParam gauge_param = newQudaGaugeParam();
     setWilsonGaugeParam(gauge_param);
 
     void *cpu_gauge[4];
-    for (int dir = 0; dir < 4; dir++) { cpu_gauge[dir] = malloc(V * gauge_site_size * gauge_param.cpu_prec); }
+    for (int dir = 0; dir < 4; dir++) { cpu_gauge[dir] = safe_malloc(V * gauge_site_size * gauge_param.cpu_prec); }
 
-    GaugeFieldParam gParam(0, param);
-    gParam.pad = 0;
+    GaugeFieldParam gParam(param);
     gParam.ghostExchange = QUDA_GHOST_EXCHANGE_NO;
     gParam.create = QUDA_NULL_FIELD_CREATE;
     gParam.link_type = param.type;
     gParam.reconstruct = param.reconstruct;
     gParam.setPrecision(gParam.Precision(), true);
 
-    cudaGaugeField *gauge;
-    gauge = new cudaGaugeField(gParam);
+    cudaGaugeField *gauge = new cudaGaugeField(gParam);
 
     // copy into regular field
     copyExtendedGauge(*gauge, *U, QUDA_CUDA_FIELD_LOCATION);
@@ -301,14 +302,15 @@ class GaugeAlgTest : public ::testing::Test
     // Write to disk
     write_gauge_field(gauge_outfile, cpu_gauge, gauge_param.cpu_prec, gauge_param.X, 0, (char **)0);
 
-    for (int dir = 0; dir < 4; dir++) free(cpu_gauge[dir]);
+    for (int dir = 0; dir < 4; dir++) host_free(cpu_gauge[dir]);
     delete gauge;
   }
 };
 
+
 TEST_F(GaugeAlgTest, Generation)
 {
-  if (execute) {
+  if (execute && !gauge_load) {
     detu = getLinkDeterminant(*U);
     ASSERT_TRUE(CheckDeterminant(detu));
   }
@@ -321,54 +323,47 @@ TEST_F(GaugeAlgTest, Landau_Overrelaxation)
     gaugeFixingOVR(*U, 4, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval,
                    gf_theta_condition);
     auto plaq_gf = plaquette(*U);
-    printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
+    printfQuda("Plaq:    %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z);
+    printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
     ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
+    saveTuneCache();
   }
 }
 
-TEST_F(GaugeAlgTest, Coulomb_Overrelaxation)
+bool checkDimsPartitioned()
 {
-  if (execute) {
-    printfQuda("Coulomb gauge fixing with overrelaxation\n");
-    gaugeFixingOVR(*U, 3, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval,
-                   gf_theta_condition);
-    auto plaq_gf = plaquette(*U);
-    printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
-    ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
-  }
+  if (comm_dim_partitioned(0) || comm_dim_partitioned(1) || comm_dim_partitioned(2) || comm_dim_partitioned(3))
+    return true;
+  return false;
 }
 
-TEST_F(GaugeAlgTest, Landau_FFT)
+bool comparePlaquette(double3 a, double3 b)
 {
-  if (execute) {
-    if (!checkDimsPartitioned()) {
-      printfQuda("Landau gauge fixing with steepest descent method with FFTs\n");
-      gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance,
-                     gf_theta_condition);
-      auto plaq_gf = plaquette(*U);
-      printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
-      ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
-    }
-  }
+  printfQuda("Plaq:    %.16e, %.16e, %.16e\n", a.x, a.y, a.z);
+  printfQuda("Plaq_gf: %.16e, %.16e, %.16e\n", b.x, b.y, b.z);
+  double a0, a1, a2;
+  a0 = std::abs(a.x - b.x);
+  a1 = std::abs(a.y - b.y);
+  a2 = std::abs(a.z - b.z);
+  double prec_val = 1.0e-5;
+  if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1.0e-15;
+  return ((a0 < prec_val) && (a1 < prec_val) && (a2 < prec_val));
 }
 
-TEST_F(GaugeAlgTest, Coulomb_FFT)
+bool checkDeterminant(double2 detu)
 {
-  if (execute) {
-    if (!checkDimsPartitioned()) {
-      printfQuda("Coulomb gauge fixing with steepest descent method with FFTs\n");
-      gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance,
-                     gf_theta_condition);
-      auto plaq_gf = plaquette(*U);
-      printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
-      ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
-    }
-  }
+  printfQuda("Det: %.16e: %.16e\n", detu.x, detu.y);
+  double prec_val = 5e-8;
+  if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1.0e-15;
+  return std::abs(1.0 - detu.x) < prec_val && std::abs(detu.y) < prec_val;
 }
 
 int main(int argc, char **argv)
 {
-  // command line options
+  // initalize google test, includes command line options
+  ::testing::InitGoogleTest(&argc, argv);
+
+  // command line options  
   auto app = make_app();
   add_gaugefix_option_group(app);
   add_heatbath_option_group(app);
@@ -376,29 +371,44 @@ int main(int argc, char **argv)
   test_type = 0;
   CLI::TransformPairs<int> test_type_map {{"Google", 0}, {"OVR", 1}, {"FFT", 2}};
   app->add_option("--test", test_type, "Test method")->transform(CLI::CheckedTransformer(test_type_map));
+  
   try {
     app->parse(argc, argv);
   } catch (const CLI::ParseError &e) {
     return app->exit(e);
   }
 
+  // initialize QMP/MPI, QUDA comms grid and RNG (host_utils.cpp)
+  initComms(argc, argv, gridsize_from_cmdline);
+
+  QudaGaugeParam gauge_param = newQudaGaugeParam();
   if (prec_sloppy == QUDA_INVALID_PRECISION) prec_sloppy = prec;
   if (link_recon_sloppy == QUDA_RECONSTRUCT_INVALID) link_recon_sloppy = link_recon;
 
-  // initialize QMP/MPI, QUDA comms grid and RNG (host_utils.cpp)
-  initComms(argc, argv, gridsize_from_cmdline);
+  setWilsonGaugeParam(gauge_param);
+  setDims(gauge_param.X);
+
+  display_test_info();
+
+  gauge_load = strcmp(latfile, "");
+  gauge_store = strcmp(gauge_outfile, "");
+
+  // If we are passing a gauge field to the test, we must allocate host memory.
+  // If no gauge is passed, we generate a quenched field on the device.
+  if (gauge_load) {
+    printfQuda("Loading gauge field from host\n");
+    for (int dir = 0; dir < 4; dir++) {
+      host_gauge[dir] = safe_malloc(V * gauge_site_size * host_gauge_data_type_size);
+    }
+    constructHostGaugeField(host_gauge, gauge_param, argc, argv);
+  }
 
   // call srand() with a rank-dependent seed
   initRand();
 
-  display_test_info();
-
   // initialize the QUDA library
   initQuda(device_ordinal);
 
-  // initalize google test, includes command line options
-  ::testing::InitGoogleTest(&argc, argv);
-
   // Ensure gtest prints only from rank 0
   ::testing::TestEventListeners &listeners = ::testing::UnitTest::GetInstance()->listeners();
   if (comm_rank() != 0) { delete listeners.Release(listeners.default_result_printer()); }
@@ -406,8 +416,14 @@ int main(int argc, char **argv)
   // return code for google test
   int test_rc = RUN_ALL_TESTS();
 
+  if (gauge_load) {
+    // release memory
+    for (int dir = 0; dir < 4; dir++) host_free(host_gauge[dir]);
+  }
+
   endQuda();
-  finalizeComms();
 
-  return test_rc;
+  finalizeComms();
+  
+  return test_rc;  
 }
diff --git a/tests/gauge_alg_test.cpp b/tests/gauge_alg_test.cpp
deleted file mode 100644
index 5728dd1141..0000000000
--- a/tests/gauge_alg_test.cpp
+++ /dev/null
@@ -1,429 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include <quda.h>
-#include <quda_internal.h>
-#include <gauge_field.h>
-
-#include <comm_quda.h>
-#include <host_utils.h>
-#include <command_line_params.h>
-#include <misc.h>
-#include <timer.h>
-
-#include <gauge_tools.h>
-#include <tune_quda.h>
-
-#include <pgauge_monte.h>
-#include <random_quda.h>
-#include <unitarization_links.h>
-
-#include <qio_field.h>
-
-#include <gtest/gtest.h>
-
-using namespace quda;
-
-//***********************************************************//
-// This boolean controls whether or not the full Google test //
-// is done. If the user passes a value of 1 or 2 to --test   //
-// then a single instance of OVR or FFT gauge fixing is done //
-// and the value of this bool is set to false. Otherwise the //
-// Google tests are performed.                               //
-//***********************************************************//
-bool execute = true;
-
-bool gauge_load;
-bool gauge_store;
-
-void *host_gauge[4];
-
-void display_test_info()
-{
-  printfQuda("running the following test:\n");
-
-  switch (test_type) {
-  case 0: printfQuda("\n Google testing\n"); break;
-  case 1: printfQuda("\nOVR gauge fix\n"); break;
-  case 2: printfQuda("\nFFT gauge fix\n"); break;
-  default: errorQuda("Undefined test type %d given", test_type);
-  }
-
-  printfQuda("prec    sloppy_prec    link_recon  sloppy_link_recon S_dimension T_dimension Ls_dimension\n");
-  printfQuda("%s   %s             %s            %s            %d/%d/%d          %d         %d\n", get_prec_str(prec),
-             get_prec_str(prec_sloppy), get_recon_str(link_recon), get_recon_str(link_recon_sloppy), xdim, ydim, zdim,
-             tdim, Lsdim);
-
-  printfQuda("Grid partition info:     X  Y  Z  T\n");
-  printfQuda("                         %d  %d  %d  %d\n", dimPartitioned(0), dimPartitioned(1), dimPartitioned(2),
-             dimPartitioned(3));
-}
-
-class GaugeAlgTest : public ::testing::Test
-{
-protected:
-  QudaGaugeParam param;
-
-  Timer<false> a0, a1;
-  double2 detu;
-  double3 plaq;
-  GaugeField *U;
-  int nsteps;
-  int nhbsteps;
-  int novrsteps;
-  bool coldstart;
-  double beta_value;
-
-  bool unit_test;
-
-  void SetReunitarizationConsts()
-  {
-    const double unitarize_eps = 1e-14;
-    const double max_error = 1e-10;
-    const int reunit_allow_svd = 1;
-    const int reunit_svd_only = 0;
-    const double svd_rel_error = 1e-6;
-    const double svd_abs_error = 1e-6;
-    setUnitarizeLinksConstants(unitarize_eps, max_error, reunit_allow_svd, reunit_svd_only, svd_rel_error, svd_abs_error);
-  }
-
-  bool comparePlaquette(double3 a, double3 b)
-  {
-    double a0, a1, a2;
-    a0 = std::abs(a.x - b.x);
-    a1 = std::abs(a.y - b.y);
-    a2 = std::abs(a.z - b.z);
-    double prec_val = 1.0e-5;
-    if (prec == QUDA_DOUBLE_PRECISION) prec_val = gf_tolerance*1e2;
-    return ((a0 < prec_val) && (a1 < prec_val) && (a2 < prec_val));
-  }
-
-  bool CheckDeterminant(double2 detu)
-  {
-    double prec_val = 5e-8;
-    if (prec == QUDA_DOUBLE_PRECISION) prec_val =  gf_tolerance*1e2;
-    return (std::abs(1.0 - detu.x) < prec_val && std::abs(detu.y) < prec_val);
-  }
-
-  virtual void SetUp()
-  {
-    if (execute) {
-      setVerbosity(QUDA_VERBOSE);
-      param = newQudaGaugeParam();
-
-      // Setup gauge container.
-      setWilsonGaugeParam(param);
-      param.t_boundary = QUDA_PERIODIC_T;
-
-      // Reunitarization setup
-      int *num_failures_h = (int *)mapped_malloc(sizeof(int));
-      int *num_failures_d = (int *)get_mapped_device_pointer(num_failures_h);
-      SetReunitarizationConsts();
-
-      a0.start();
-
-      // If no field is loaded, create a physical quenched field on the device
-      if (!gauge_load) {
-        GaugeFieldParam gParam(param);
-        gParam.ghostExchange = QUDA_GHOST_EXCHANGE_EXTENDED;
-        gParam.create = QUDA_NULL_FIELD_CREATE;
-        gParam.reconstruct = link_recon;
-        gParam.setPrecision(prec, true);
-        for (int d = 0; d < 4; d++) {
-          if (comm_dim_partitioned(d)) gParam.r[d] = 2;
-          gParam.x[d] += 2 * gParam.r[d];
-        }
-
-        U = new cudaGaugeField(gParam);
-
-        RNG randstates(*U, 1234);
-
-	nsteps = heatbath_num_steps;
-	nhbsteps = heatbath_num_heatbath_per_step;
-	novrsteps = heatbath_num_overrelax_per_step;
-	coldstart = heatbath_coldstart;
-	beta_value = heatbath_beta_value;
-	a1.start();
-
-	if (coldstart)
-	  InitGaugeField(*U);
-	else
-	  InitGaugeField(*U, randstates);
-
-	for (int step = 1; step <= nsteps; ++step) {
-	  printfQuda("Step %d\n", step);
-	  Monte(*U, randstates, beta_value, nhbsteps, novrsteps);
-
-	  // Reunitarization
-	  *num_failures_h = 0;
-	  unitarizeLinks(*U, num_failures_d);
-	  qudaDeviceSynchronize();
-	  if (*num_failures_h > 0) errorQuda("Error in the unitarization (%d errors)", *num_failures_h);
-	  plaq = plaquette(*U);
-	  printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z);
-	}
-
-	a1.stop();
-	printfQuda("Time Monte -> %.6f s\n", a1.last());
-      } else {
-
-	// If a field is loaded, create a device field and copy
-	printfQuda("Copying gauge field from host\n");
-	param.location = QUDA_CPU_FIELD_LOCATION;
-	GaugeFieldParam gauge_field_param(param, host_gauge);
-	gauge_field_param.ghostExchange = QUDA_GHOST_EXCHANGE_NO;
-	GaugeField *host = GaugeField::Create(gauge_field_param);
-
-	// switch the parameters for creating the mirror precise cuda gauge field
-	gauge_field_param.create = QUDA_NULL_FIELD_CREATE;
-	gauge_field_param.reconstruct = param.reconstruct;
-	gauge_field_param.setPrecision(param.cuda_prec, true);
-
-	if (comm_partitioned()) {
-	  int R[4] = {0, 0, 0, 0};
-	  for (int d = 0; d < 4; d++) if (comm_dim_partitioned(d)) R[d] = 2;
-	  static TimeProfile GaugeFix("GaugeFix");
-	  cudaGaugeField *tmp = new cudaGaugeField(gauge_field_param);
-	  tmp->copy(*host);
-	  U = createExtendedGauge(*tmp, R, GaugeFix);
-	  delete tmp;
-	} else {
-	  U = new cudaGaugeField(gauge_field_param);
-	  U->copy(*host);
-	}
-
-	delete host;
-
-	// Reunitarization
-	*num_failures_h = 0;
-	unitarizeLinks(*U, num_failures_d);
-	qudaDeviceSynchronize();
-	if (*num_failures_h > 0) errorQuda("Error in the unitarization (%d errors)", *num_failures_h);
-
-	plaq = plaquette(*U);
-	printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z);
-      }
-
-
-      // If a specific test type is requested, perfrom it now and then
-      // turn off all Google tests in the tear down.
-      switch (test_type) {
-      case 0:
-	// Do the Google testing
-	break;
-      case 1: run_ovr(); break;
-      case 2: run_fft(); break;
-      default: errorQuda("Invalid test type %d", test_type);
-      }
-
-      host_free(num_failures_h);
-    }
-  }
-
-  virtual void TearDown()
-  {
-    if (execute) {
-      detu = getLinkDeterminant(*U);
-      double2 tru = getLinkTrace(*U);
-      printfQuda("Det: %.16e:%.16e\n", detu.x, detu.y);
-      printfQuda("Tr: %.16e:%.16e\n", tru.x / 3.0, tru.y / 3.0);
-
-      delete U;
-      // Release all temporary memory used for data exchange between GPUs in multi-GPU mode
-      PGaugeExchangeFree();
-
-      a0.stop();
-      printfQuda("Time -> %.6f s\n", a0.last());
-    }
-    // If we performed a specific instance, switch off the
-    // Google testing.
-    if (test_type != 0) execute = false;
-  }
-
-  virtual void run_ovr()
-  {
-    if (execute) {
-      gaugeFixingOVR(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance,
-                     gf_reunit_interval, gf_theta_condition);
-      auto plaq_gf = plaquette(*U);
-      printfQuda("Plaq:    %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z);
-      printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
-      ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
-      saveTuneCache();
-      // Save if output string is specified
-      if (gauge_store) save_gauge();
-    }
-  }
-  virtual void run_fft()
-  {
-    if (execute) {
-      if (!comm_partitioned()) {
-        printfQuda("Landau gauge fixing with steepest descent method with FFTs\n");
-        gaugeFixingFFT(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance,
-                       gf_theta_condition);
-
-        auto plaq_gf = plaquette(*U);
-	printfQuda("Plaq:    %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z);
-	printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
-        ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
-	saveTuneCache();
-        // Save if output string is specified
-        if (gauge_store) save_gauge();
-      } else {
-        errorQuda("Cannot perform FFT gauge fixing with MPI partitions.");
-      }
-    }
-  }
-
-  virtual void save_gauge()
-  {
-    printfQuda("Saving the gauge field to file %s\n", gauge_outfile);
-
-    QudaGaugeParam gauge_param = newQudaGaugeParam();
-    setWilsonGaugeParam(gauge_param);
-
-    void *cpu_gauge[4];
-    for (int dir = 0; dir < 4; dir++) { cpu_gauge[dir] = safe_malloc(V * gauge_site_size * gauge_param.cpu_prec); }
-
-    GaugeFieldParam gParam(param);
-    gParam.ghostExchange = QUDA_GHOST_EXCHANGE_NO;
-    gParam.create = QUDA_NULL_FIELD_CREATE;
-    gParam.link_type = param.type;
-    gParam.reconstruct = param.reconstruct;
-    gParam.setPrecision(gParam.Precision(), true);
-
-    cudaGaugeField *gauge = new cudaGaugeField(gParam);
-
-    // copy into regular field
-    copyExtendedGauge(*gauge, *U, QUDA_CUDA_FIELD_LOCATION);
-    saveGaugeFieldQuda((void *)cpu_gauge, (void *)gauge, &gauge_param);
-
-    // Write to disk
-    write_gauge_field(gauge_outfile, cpu_gauge, gauge_param.cpu_prec, gauge_param.X, 0, (char **)0);
-
-    for (int dir = 0; dir < 4; dir++) host_free(cpu_gauge[dir]);
-    delete gauge;
-  }
-};
-
-
-TEST_F(GaugeAlgTest, Generation)
-{
-  if (execute && !gauge_load) {
-    detu = getLinkDeterminant(*U);
-    ASSERT_TRUE(CheckDeterminant(detu));
-  }
-}
-
-TEST_F(GaugeAlgTest, Landau_Overrelaxation)
-{
-  if (execute) {
-    printfQuda("Landau gauge fixing with overrelaxation\n");
-    gaugeFixingOVR(*U, 4, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval,
-                   gf_theta_condition);
-    auto plaq_gf = plaquette(*U);
-    printfQuda("Plaq:    %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z);
-    printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
-    ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
-    saveTuneCache();
-  }
-}
-
-bool checkDimsPartitioned()
-{
-  if (comm_dim_partitioned(0) || comm_dim_partitioned(1) || comm_dim_partitioned(2) || comm_dim_partitioned(3))
-    return true;
-  return false;
-}
-
-bool comparePlaquette(double3 a, double3 b)
-{
-  printfQuda("Plaq:    %.16e, %.16e, %.16e\n", a.x, a.y, a.z);
-  printfQuda("Plaq_gf: %.16e, %.16e, %.16e\n", b.x, b.y, b.z);
-  double a0, a1, a2;
-  a0 = std::abs(a.x - b.x);
-  a1 = std::abs(a.y - b.y);
-  a2 = std::abs(a.z - b.z);
-  double prec_val = 1.0e-5;
-  if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1.0e-15;
-  return ((a0 < prec_val) && (a1 < prec_val) && (a2 < prec_val));
-}
-
-bool checkDeterminant(double2 detu)
-{
-  printfQuda("Det: %.16e: %.16e\n", detu.x, detu.y);
-  double prec_val = 5e-8;
-  if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1.0e-15;
-  return std::abs(1.0 - detu.x) < prec_val && std::abs(detu.y) < prec_val;
-}
-
-int main(int argc, char **argv)
-{
-  // initalize google test, includes command line options
-  ::testing::InitGoogleTest(&argc, argv);
-
-  // command line options  
-  auto app = make_app();
-  add_gaugefix_option_group(app);
-  add_heatbath_option_group(app);
-
-  test_type = 0;
-  CLI::TransformPairs<int> test_type_map {{"Google", 0}, {"OVR", 1}, {"FFT", 2}};
-  app->add_option("--test", test_type, "Test method")->transform(CLI::CheckedTransformer(test_type_map));
-  
-  try {
-    app->parse(argc, argv);
-  } catch (const CLI::ParseError &e) {
-    return app->exit(e);
-  }
-
-  // initialize QMP/MPI, QUDA comms grid and RNG (host_utils.cpp)
-  initComms(argc, argv, gridsize_from_cmdline);
-
-  QudaGaugeParam gauge_param = newQudaGaugeParam();
-  if (prec_sloppy == QUDA_INVALID_PRECISION) prec_sloppy = prec;
-  if (link_recon_sloppy == QUDA_RECONSTRUCT_INVALID) link_recon_sloppy = link_recon;
-
-  setWilsonGaugeParam(gauge_param);
-  setDims(gauge_param.X);
-
-  display_test_info();
-
-  gauge_load = strcmp(latfile, "");
-  gauge_store = strcmp(gauge_outfile, "");
-
-  // If we are passing a gauge field to the test, we must allocate host memory.
-  // If no gauge is passed, we generate a quenched field on the device.
-  if (gauge_load) {
-    printfQuda("Loading gauge field from host\n");
-    for (int dir = 0; dir < 4; dir++) {
-      host_gauge[dir] = safe_malloc(V * gauge_site_size * host_gauge_data_type_size);
-    }
-    constructHostGaugeField(host_gauge, gauge_param, argc, argv);
-  }
-
-  // call srand() with a rank-dependent seed
-  initRand();
-
-  // initialize the QUDA library
-  initQuda(device_ordinal);
-
-  // Ensure gtest prints only from rank 0
-  ::testing::TestEventListeners &listeners = ::testing::UnitTest::GetInstance()->listeners();
-  if (comm_rank() != 0) { delete listeners.Release(listeners.default_result_printer()); }
-
-  // return code for google test
-  int test_rc = RUN_ALL_TESTS();
-
-  if (gauge_load) {
-    // release memory
-    for (int dir = 0; dir < 4; dir++) host_free(host_gauge[dir]);
-  }
-
-  endQuda();
-
-  finalizeComms()
-  
-  return test_rc;  
-}

From dfb26685cdf9bf2cb13b473a565357a569e3975b Mon Sep 17 00:00:00 2001
From: cpviolator <dmhowarth26@gmail.com>
Date: Thu, 5 Aug 2021 14:53:41 -0700
Subject: [PATCH 14/32] Add gauge param argument

---
 lib/interface_quda.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp
index 73cdb76b45..5e6848bd79 100644
--- a/lib/interface_quda.cpp
+++ b/lib/interface_quda.cpp
@@ -5545,7 +5545,7 @@ int computeGaugeFixingOVRQuda(void *gauge, const unsigned int gauge_dir, const u
   checkGaugeParam(param);
 
   profileGaugeFixOVR.TPSTART(QUDA_PROFILE_INIT);
-  GaugeFieldParam gParam(*param);
+  GaugeFieldParam gParam(*param, gauge);
   auto *cpuGauge = new cpuGaugeField(gParam);
 
   // gParam.pad = getFatLinkPadding(param->X);

From bb9c0de72f285c002fb140f9a2bae100d80f3cd6 Mon Sep 17 00:00:00 2001
From: cpviolator <dmhowarth26@gmail.com>
Date: Mon, 11 Oct 2021 11:35:12 -0700
Subject: [PATCH 15/32] Make gf_fft_autotune default to true

---
 tests/gauge_alg_ctest.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/gauge_alg_ctest.cpp b/tests/gauge_alg_ctest.cpp
index 9902daab8f..fb810385ba 100644
--- a/tests/gauge_alg_ctest.cpp
+++ b/tests/gauge_alg_ctest.cpp
@@ -69,7 +69,7 @@ double gf_fft_alpha = 0.8;
 int gf_reunit_interval = 10;
 double gf_tolerance = 1e-6;
 bool gf_theta_condition = false;
-bool gf_fft_autotune = false;
+bool gf_fft_autotune = true;
 
 void add_gaugefix_option_group(std::shared_ptr<QUDAApp> quda_app)
 {

From 82658431f35097db72e5f86ea6733d2db1a853b0 Mon Sep 17 00:00:00 2001
From: cpviolator <dmhowarth26@gmail.com>
Date: Tue, 12 Oct 2021 14:20:15 -0700
Subject: [PATCH 16/32] Remove alpha autotuning in FFT gauge fixing, add
 comments where appropriate. Fix gauge dir error in gauge_alg_test, adjust
 step output in gauge fixing

---
 include/quda.h            |  2 +-
 lib/gauge_fix_fft.cu      | 24 ++++++++++++++++++------
 lib/gauge_fix_ovr.cu      |  6 +++---
 lib/interface_quda.cpp    | 10 ++++++++--
 tests/gauge_alg_ctest.cpp | 37 ++++++++++++++++++++++++++-----------
 5 files changed, 56 insertions(+), 23 deletions(-)

diff --git a/include/quda.h b/include/quda.h
index bf9bbe64c3..4b0e8c5917 100644
--- a/include/quda.h
+++ b/include/quda.h
@@ -1525,7 +1525,7 @@ extern "C" {
    * @param[in] Nsteps, maximum number of steps to perform gauge fixing
    * @param[in] verbose_interval, print gauge fixing info when iteration count is a multiple of this
    * @param[in] alpha, gauge fixing parameter of the method, most common value is 0.08
-   * @param[in] autotune, 1 to autotune the method, i.e., if the Fg inverts its tendency we decrease the alpha value
+   * @param[in] autotune (legacy), 1 to autotune the method, i.e., if the Fg inverts its tendency we decrease the alpha value. We hardocde this to 1 to ensure optimal behaviour. Instructions on how the user may regain control of this parameter are located in comments in lib/gauge_fix_fft.cu
    * @param[in] tolerance, torelance value to stop the method, if this value is zero then the method stops when
    * iteration reachs the maximum number of steps defined by Nsteps
    * @param[in] stopWtheta, 0 for MILC criterion and 1 to use the theta value
diff --git a/lib/gauge_fix_fft.cu b/lib/gauge_fix_fft.cu
index 54c7ab8845..c92f80a242 100644
--- a/lib/gauge_fix_fft.cu
+++ b/lib/gauge_fix_fft.cu
@@ -185,12 +185,24 @@ namespace quda {
   void gaugeFixingFFT(GaugeField& data, int Nsteps, int verbose_interval,
                       double alpha0, int autotune, double tolerance, int stopWtheta)
   {
+    // We hardcode the value of autotune to 1 at this point as it is the last
+    // point before computation begins. This ensures that the the user cannot
+    // override alpha autotuning. This is done because it is very easy for the
+    // FFT gauge fixing to fail with a poorly chosen value of alpha, but
+    // autotuning ensures optimal behaviour.
+    // Users who wish to change this behaviour may remove the follwing line
+    // of code and recompile to regain control of alpha autotuning.
+    autotune = 1;
+    
     TimeProfile profileInternalGaugeFixFFT("InternalGaugeFixQudaFFT", false);
 
     profileInternalGaugeFixFFT.TPSTART(QUDA_PROFILE_COMPUTE);
 
     if (getVerbosity() >= QUDA_SUMMARIZE) {
-      printfQuda("\tAuto tune active: %s\n", autotune ? "true" : "false");      
+      if(autotune == 1) printfQuda("\tAuto tune active: alpha will be adjusted as the algorithm progresses\n");
+      else if(autotune == 0) printfQuda("\tAuto tune not active: alpha will remain constant as the algorithm progresses\n");
+      else errorQuda("Unknown value of autotune = %d", autotune);
+      
       printfQuda("\tAlpha parameter of the Steepest Descent Method: %e\n", alpha0);
       printfQuda("\tTolerance: %e\n", tolerance);
       printfQuda("\tStop criterion method: %s\n", stopWtheta ? "Theta" : "Delta");
@@ -217,7 +229,7 @@ namespace quda {
     GaugeFixQuality<decltype(argQ)> gfixquality(argQ, data);
     gfixquality.apply(device::get_default_stream());
     double action0 = argQ.getAction();
-    if(getVerbosity() >= QUDA_SUMMARIZE) printf("Step: %d\tAction: %.16e\ttheta: %.16e\n", 0, argQ.getAction(), argQ.getTheta());
+    if(getVerbosity() >= QUDA_SUMMARIZE) printf("Step: %05d\tAction: %.16e\ttheta: %.16e\n", 0, argQ.getAction(), argQ.getTheta());
 
     double diff = 0.0;
     int iter = 0;
@@ -285,11 +297,11 @@ namespace quda {
       double action = argQ.getAction();
       diff = abs(action0 - action);
       if ((iter % verbose_interval) == (verbose_interval - 1) && getVerbosity() >= QUDA_SUMMARIZE)
-        printf("Step: %d\tAction: %.16e\ttheta: %.16e\tDelta: %.16e\n", iter + 1, argQ.getAction(), argQ.getTheta(), diff);
+        printf("Step: %05d\tAction: %.16e\ttheta: %.16e\tDelta: %.16e\n", iter + 1, argQ.getAction(), argQ.getTheta(), diff);
       if ( autotune && ((action - action0) < -1e-14) ) {
         if ( arg.alpha > 0.01 ) {
           arg.alpha = 0.95 * arg.alpha;
-          if(getVerbosity() >= QUDA_SUMMARIZE) printf(">>>>>>>>>>>>>> Warning: changing alpha down -> %.4e\n", arg.alpha);
+          if(getVerbosity() >= QUDA_SUMMARIZE) printf("Changing alpha down -> %.4e\n", arg.alpha);
         }
       }
       //------------------------------------------------------------------------
@@ -301,7 +313,7 @@ namespace quda {
       action0 = action;
     }
     if ((iter % verbose_interval) != 0 && getVerbosity() >= QUDA_SUMMARIZE)
-      printf("Step: %d\tAction: %.16e\ttheta: %.16e\tDelta: %.16e\n", iter, argQ.getAction(), argQ.getTheta(), diff);
+      printf("Step: %05d\tAction: %.16e\ttheta: %.16e\tDelta: %.16e\n", iter, argQ.getAction(), argQ.getTheta(), diff);
     
     // Reunitarize at end
     const double unitarize_eps = 1e-14;
@@ -382,7 +394,7 @@ namespace quda {
    * @param[in] Nsteps, maximum number of steps to perform gauge fixing
    * @param[in] verbose_interval, print gauge fixing info when iteration count is a multiple of this
    * @param[in] alpha, gauge fixing parameter of the method, most common value is 0.08
-   * @param[in] autotune, 1 to autotune the method, i.e., if the Fg inverts its tendency we decrease the alpha value
+   * @param[in] autotune (legacy), 1 to autotune the method, i.e., if the Fg inverts its tendency we decrease the alpha value. We hardcode this to true.
    * @param[in] tolerance, torelance value to stop the method, if this value is zero then the method stops when iteration reachs the maximum number of steps defined by Nsteps
    * @param[in] stopWtheta, 0 for MILC criterion and 1 to use the theta value
    */
diff --git a/lib/gauge_fix_ovr.cu b/lib/gauge_fix_ovr.cu
index b97772f43d..93a3521530 100644
--- a/lib/gauge_fix_ovr.cu
+++ b/lib/gauge_fix_ovr.cu
@@ -313,7 +313,7 @@ namespace quda {
     flop += (double)GaugeFixQuality.flops();
     byte += (double)GaugeFixQuality.bytes();
     double action0 = argQ.getAction();
-    if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Step: %d\tAction: %.16e\ttheta: %.16e\n", 0, argQ.getAction(), argQ.getTheta());
+    if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Step: %05d\tAction: %.16e\ttheta: %.16e\n", 0, argQ.getAction(), argQ.getTheta());
 
     *num_failures_h = 0;
     unitarizeLinks(data, data, num_failures_d);
@@ -413,7 +413,7 @@ namespace quda {
       double action = argQ.getAction();
       double diff = abs(action0 - action);
       if ((iter % verbose_interval) == (verbose_interval - 1) && getVerbosity() >= QUDA_VERBOSE)
-        printfQuda("Step: %d\tAction: %.16e\ttheta: %.16e\tDelta: %.16e\n", iter + 1, argQ.getAction(), argQ.getTheta(), diff);
+        printfQuda("Step: %05d\tAction: %.16e\ttheta: %.16e\tDelta: %.16e\n", iter + 1, argQ.getAction(), argQ.getTheta(), diff);
       if (stopWtheta) {
         if (argQ.getTheta() < tolerance) break;
       } else {
@@ -436,7 +436,7 @@ namespace quda {
       byte += (double)GaugeFixQuality.bytes();
       double action = argQ.getAction();
       double diff = abs(action0 - action);
-      if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Step: %d\tAction: %.16e\ttheta: %.16e\tDelta: %.16e\n", iter + 1, argQ.getAction(), argQ.getTheta(), diff);
+      if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Step: %05d\tAction: %.16e\ttheta: %.16e\tDelta: %.16e\n", iter + 1, argQ.getAction(), argQ.getTheta(), diff);
     }
 
     for (int i = 0; i < 2 && nlinksfaces; i++) managed_free(borderpoints[i]);
diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp
index 0c5ff3b8df..a99373452c 100644
--- a/lib/interface_quda.cpp
+++ b/lib/interface_quda.cpp
@@ -5652,10 +5652,16 @@ int computeGaugeFixingFFTQuda(void* gauge, const unsigned int gauge_dir,  const
   // perform the update
   profileGaugeFixFFT.TPSTART(QUDA_PROFILE_COMPUTE);
 
+  // We hardcode the value of autotune to 1 in the kernel call (lib/gauge_fix_fft.cu)
+  // This ensures that the user can not override alpha autotuning. This is done because
+  // it is very easy for the FFT gauge fixing to fail with a poorly chosen value of
+  // alpha, but autotuning alpha ensures optimal behaviour.
+  // Users who wish to change this behaviour may read the comment in
+  // lib/gauge_fix_fft.cu to regain control.
   gaugeFixingFFT(*cudaInGauge, gauge_dir, Nsteps, verbose_interval, alpha, autotune, tolerance, stopWtheta);
-
+  
   profileGaugeFixFFT.TPSTOP(QUDA_PROFILE_COMPUTE);
-
+  
   // copy the gauge field back to the host
   profileGaugeFixFFT.TPSTART(QUDA_PROFILE_D2H);
   cudaInGauge->saveCPUField(*cpuGauge);
diff --git a/tests/gauge_alg_ctest.cpp b/tests/gauge_alg_ctest.cpp
index fb810385ba..4ea6a7fbc4 100644
--- a/tests/gauge_alg_ctest.cpp
+++ b/tests/gauge_alg_ctest.cpp
@@ -69,7 +69,6 @@ double gf_fft_alpha = 0.8;
 int gf_reunit_interval = 10;
 double gf_tolerance = 1e-6;
 bool gf_theta_condition = false;
-bool gf_fft_autotune = true;
 
 void add_gaugefix_option_group(std::shared_ptr<QUDAApp> quda_app)
 {
@@ -90,9 +89,6 @@ void add_gaugefix_option_group(std::shared_ptr<QUDAApp> quda_app)
   opgroup->add_option(
     "--gf-theta-condition", gf_theta_condition,
     "Use the theta value to determine the gauge fixing if true. If false, use the delta value (default false)");
-  opgroup->add_option(
-    "--gf-fft-autotune", gf_fft_autotune,
-    "In the FFT method, automatically adjust the alpha parameter if the quality begins to diverge (default false)");
 }
 
 class GaugeAlgTest : public ::testing::Test {
@@ -285,6 +281,7 @@ class GaugeAlgTest : public ::testing::Test {
   virtual void run_ovr()
   {
     if (execute) {
+      printfQuda("%s gauge fixing with overrelaxation method\n",  gf_gauge_dir == 4 ? "Landau" : "Coulomb");
       gaugeFixingOVR(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance,
                      gf_reunit_interval, gf_theta_condition);
       auto plaq_gf = plaquette(*U);
@@ -300,8 +297,14 @@ class GaugeAlgTest : public ::testing::Test {
   {
     if (execute) {
       if (!checkDimsPartitioned()) {
-        printfQuda("Landau gauge fixing with steepest descent method with FFTs\n");
-        gaugeFixingFFT(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance,
+        printfQuda("%s gauge fixing with steepest descent method with FFT\n", gf_gauge_dir == 4 ? "Landau" : "Coulomb");
+	// We hardcode the value of autotune to 1 in the kernel call (lib/gauge_fix_fft.cu)
+	// This ensures that the user can not override alpha autotuning. This is done because
+	// it is very easy for the FFT gauge fixing to fail with a poorly chosen value of
+	// alpha, but autotuning alpha ensures optimal behaviour.
+	// Users who wish to change this behaviour may read the comment in
+	// lib/gauge_fix_fft.cu to regain control.
+	gaugeFixingFFT(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, 1, gf_tolerance,
                        gf_theta_condition);
 
         auto plaq_gf = plaquette(*U);
@@ -389,8 +392,14 @@ TEST_F(GaugeAlgTest, Landau_FFT)
 {
   if (execute) {
     if (!comm_partitioned()) {
-      printfQuda("Landau gauge fixing with steepest descent method with FFTs\n");
-      gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance,
+      printfQuda("Landau gauge fixing with steepest descent method with FFT\n");
+      // We hardcode the value of autotune to 1 in the kernel call (lib/gauge_fix_fft.cu)
+      // This ensures that the user can not override alpha autotuning. This is done because
+      // it is very easy for the FFT gauge fixing to fail with a poorly chosen value of
+      // alpha, but autotuning alpha ensures optimal behaviour.
+      // Users who wish to change this behaviour may read the comment in
+      // lib/gauge_fix_fft.cu to regain control.
+      gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, 1, gf_tolerance,
                      gf_theta_condition);
       auto plaq_gf = plaquette(*U);
       printfQuda("Plaq:    %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z);
@@ -405,10 +414,16 @@ TEST_F(GaugeAlgTest, Coulomb_FFT)
 {
   if (execute) {
     if (!comm_partitioned()) {
-      printfQuda("Coulomb gauge fixing with steepest descent method with FFTs\n");
-      gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance,
+      printfQuda("Coulomb gauge fixing with steepest descent method with FFT\n");
+      // We hardcode the value of autotune to 1 in the kernel call (lib/gauge_fix_fft.cu)
+      // This ensures that the user can not override alpha autotuning. This is done because
+      // it is very easy for the FFT gauge fixing to fail with a poorly chosen value of
+      // alpha, but autotuning alpha ensures optimal behaviour.
+      // Users who wish to change this behaviour may read the comment in
+      // lib/gauge_fix_fft.cu to regain control.
+      gaugeFixingFFT(*U, 3, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, 1, gf_tolerance,
                      gf_theta_condition);
-      auto plaq_gf = plaquette(*U);
+auto plaq_gf = plaquette(*U);
       printfQuda("Plaq:    %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z);
       printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
       ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));

From 040be93004b847b2092280c1b79016904943a511 Mon Sep 17 00:00:00 2001
From: cpviolator <dmhowarth26@gmail.com>
Date: Fri, 29 Oct 2021 12:21:46 -0700
Subject: [PATCH 17/32] Revert MPI test params

---
 tests/CMakeLists.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 764cc6af97..4beabb3d81 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -812,8 +812,8 @@ foreach(prec IN LISTS TEST_PRECS)
 
   if(QUDA_GAUGE_ALG)
     add_test(NAME gauge_alg_${prec}
-             COMMAND ${QUDA_CTEST_LAUNCH} $<TARGET_FILE:gauge_alg ctest> ${MPIEXEC_POSTFLAGS}
-                     --dim 2 4 6 8 --prec ${prec}
+             COMMAND ${QUDA_CTEST_LAUNCH} $<TARGET_FILE:gauge_alg_ctest> ${MPIEXEC_POSTFLAGS}
+                     --dim 4 6 8 10 --prec ${prec}
                      --gtest_output=xml:gauge_arg_test_${prec}.xml)
   endif()
 

From bb91ecd531bc6c0f433707cf56f1ea2e8932433a Mon Sep 17 00:00:00 2001
From: cpviolator <dmhowarth26@gmail.com>
Date: Fri, 29 Oct 2021 12:54:52 -0700
Subject: [PATCH 18/32] Use device_timer instead of Timer

---
 tests/gauge_alg_ctest.cpp | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/tests/gauge_alg_ctest.cpp b/tests/gauge_alg_ctest.cpp
index 4ea6a7fbc4..4afa0598ce 100644
--- a/tests/gauge_alg_ctest.cpp
+++ b/tests/gauge_alg_ctest.cpp
@@ -95,8 +95,8 @@ class GaugeAlgTest : public ::testing::Test {
 
 protected:
   QudaGaugeParam param;
-  
-  Timer<false> a0,a1;
+
+  device_timer_t device_timer_1, device_timer_2;
   double2 detu;
   double3 plaq;
   cudaGaugeField *U;
@@ -158,7 +158,7 @@ class GaugeAlgTest : public ::testing::Test {
       int *num_failures_d = (int *)get_mapped_device_pointer(num_failures_h);
       SetReunitarizationConsts();
 
-      a0.start();
+      device_timer_1.start();
 
       // If no field is loaded, create a physical quenched field on the device
       if (!gauge_load) {
@@ -181,7 +181,7 @@ class GaugeAlgTest : public ::testing::Test {
         novrsteps = heatbath_num_overrelax_per_step;
         coldstart = heatbath_coldstart;
         beta_value = heatbath_beta_value;
-        a1.start();
+        device_timer_2.start();
 
         if (coldstart)
           InitGaugeField(*U);
@@ -201,8 +201,8 @@ class GaugeAlgTest : public ::testing::Test {
           printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z);
         }
 
-        a1.stop();
-        printfQuda("Time Monte -> %.6f s\n", a1.last());
+        device_timer_2.stop();
+        printfQuda("Time Monte -> %.6f s\n", device_timer_2.last());
       } else {
 
         // If a field is loaded, create a device field and copy
@@ -270,8 +270,8 @@ class GaugeAlgTest : public ::testing::Test {
       // Release all temporary memory used for data exchange between GPUs in multi-GPU mode
       PGaugeExchangeFree();
 
-      a0.stop(__func__, __FILE__, __LINE__);
-      printfQuda("Time -> %.6f s\n", a0.last());
+      device_timer_1.stop();
+      printfQuda("Time -> %.6f s\n", device_timer_1.last());
     }
     // If we performed a specific instance, switch off the
     // Google testing.
@@ -328,7 +328,7 @@ class GaugeAlgTest : public ::testing::Test {
     setWilsonGaugeParam(gauge_param);
 
     void *cpu_gauge[4];
-    for (int dir = 0; dir < 4; dir++) { cpu_gauge[dir] = malloc(V * gauge_site_size * gauge_param.cpu_prec); }
+    for (int dir = 0; dir < 4; dir++) { cpu_gauge[dir] = safe_malloc(V * gauge_site_size * gauge_param.cpu_prec); }
 
     GaugeFieldParam gParam(param);
     gParam.ghostExchange = QUDA_GHOST_EXCHANGE_NO;

From f2558fea690c8ce414ea8a1ec9d0d8ee6b03ad51 Mon Sep 17 00:00:00 2001
From: cpviolator <dmhowarth26@gmail.com>
Date: Fri, 29 Oct 2021 16:41:19 -0700
Subject: [PATCH 19/32] Remove extra semi-colon

---
 tests/su3_test.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/su3_test.cpp b/tests/su3_test.cpp
index a5b1b83afb..9f166b6ca6 100644
--- a/tests/su3_test.cpp
+++ b/tests/su3_test.cpp
@@ -94,8 +94,7 @@ void add_su3_option_group(std::shared_ptr<QUDAApp> quda_app)
 
   opgroup->add_option("--su3-wflow-type", wflow_type, "The type of action to use in the wilson flow (default wilson)")
     ->transform(CLI::QUDACheckedTransformer(wflow_type_map));
-  ;
-
+  
   opgroup->add_option("--su3-measurement-interval", measurement_interval,
                       "Measure the field energy and topological charge every Nth step (default 5) ");
 }

From 4f98cc54dd1f853f4473686af03e5520bba7d5d0 Mon Sep 17 00:00:00 2001
From: cpviolator <dmhowarth26@gmail.com>
Date: Fri, 29 Oct 2021 16:53:46 -0700
Subject: [PATCH 20/32] Consolidate the gauge fixing interafce, use new
 parameter struct QudaGaugeFixParam to define both parameters and algorithm,
 format variable names to be consistent, use QudaBoolean for booleans rather
 than integers

---
 include/enum_quda.h         |   6 ++
 include/enum_quda_fortran.h |  11 +++
 include/gauge_tools.h       |  22 ++---
 include/quda.h              |  73 ++++++++-------
 lib/check_params.h          |  49 +++++++++-
 lib/gauge_fix_fft.cu        |  58 +++++-------
 lib/gauge_fix_ovr.cu        |  34 +++----
 lib/interface_quda.cpp      | 178 ++++++++++++------------------------
 lib/milc_interface.cpp      |  33 +++++--
 tests/gauge_alg_ctest.cpp   |  46 ++++------
 10 files changed, 255 insertions(+), 255 deletions(-)

diff --git a/include/enum_quda.h b/include/enum_quda.h
index 15354a1de7..fee22c5169 100644
--- a/include/enum_quda.h
+++ b/include/enum_quda.h
@@ -550,6 +550,12 @@ typedef enum QudaWFlowType_s {
   QUDA_WFLOW_TYPE_INVALID = QUDA_INVALID_ENUM
 } QudaWFlowType;
 
+typedef enum QudaGaugeFixType_s {
+  QUDA_GAUGEFIX_TYPE_OVR = 0,
+  QUDA_GAUGEFIX_TYPE_FFT = 1,
+  QUDA_GAUGEFIX_TYPE_INVALID = QUDA_INVALID_ENUM
+} QudaGaugeFixType;
+
 // Allows to choose an appropriate external library
 typedef enum QudaExtLibType_s {
   QUDA_CUSOLVE_EXTLIB,
diff --git a/include/enum_quda_fortran.h b/include/enum_quda_fortran.h
index e810b1631a..7614e2bbdc 100644
--- a/include/enum_quda_fortran.h
+++ b/include/enum_quda_fortran.h
@@ -493,6 +493,17 @@
 #define QUDA_CONTRACT_GAMMA_S34 15
 #define QUDA_CONTRACT_GAMMA_INVALID QUDA_INVALID_ENUM
 
+#define QudaWFlowType integer(4)
+#define QUDA_WFLOW_TYPE_WILSON 0
+#define QUDA_WFLOW_TYPE_SYMANZIK 1
+#define QUDA_WFLOW_TYPE_INVALID QUDA_INVALID_ENUM
+
+#define QudaGaugeFixType integer(4)
+#define QUDA_GAUGEFIX_TYPE_OVR 0
+#define QUDA_GAUGEFIX_TYPE_FFT 1
+#define QUDA_GAUGEFIX_TYPE_INVALID QUDA_INVALID_ENUM
+
+
 #define QudaExtLibType integer(4)
 #define QUDA_CUSOLVE_EXTLIB 0
 #define QUDA_EIGEN_EXTLIB 1
diff --git a/include/gauge_tools.h b/include/gauge_tools.h
index e32f38f5b1..25b1691195 100644
--- a/include/gauge_tools.h
+++ b/include/gauge_tools.h
@@ -114,33 +114,33 @@ namespace quda
    * @brief Gauge fixing with overrelaxation with support for single and multi GPU.
    * @param[in,out] data, quda gauge field
    * @param[in] gauge_dir, 3 for Coulomb gauge fixing, other for Landau gauge fixing
-   * @param[in] Nsteps, maximum number of steps to perform gauge fixing
+   * @param[in] steps, maximum number of steps to perform gauge fixing
    * @param[in] verbose_interval, print gauge fixing info when iteration count is a multiple of this
    * @param[in] relax_boost, gauge fixing parameter of the overrelaxation method, most common value is 1.5 or 1.7.
    * @param[in] tolerance, torelance value to stop the method, if this
    * value is zero then the method stops when iteration reachs the
-   * maximum number of steps defined by Nsteps
+   * maximum number of steps defined by steps
    * @param[in] reunit_interval, reunitarize gauge field when iteration count is a multiple of this
-   * @param[in] stopWtheta, 0 for MILC criterion and 1 to use the theta value
+   * @param[in] theta_condition, QUDA_BOOLEAN_FALSE for MILC criterion and QUDA_BOOLEAN_TRUE to use the theta value
    */
-  void gaugeFixingOVR(GaugeField &data, const int gauge_dir, const int Nsteps, const int verbose_interval,
-                      const double relax_boost, const double tolerance, const int reunit_interval, const int stopWtheta);
+  void gaugeFixingOVR(GaugeField &data, const int gauge_dir, const int steps, const int verbose_interval,
+                      const double relax_boost, const double tolerance, const int reunit_interval, const QudaBoolean theta_condition);
 
   /**
    * @brief Gauge fixing with Steepest descent method with FFTs with support for single GPU only.
    * @param[in,out] data, quda gauge field
    * @param[in] gauge_dir, 3 for Coulomb gauge fixing, other for Landau gauge fixing
-   * @param[in] Nsteps, maximum number of steps to perform gauge fixing
+   * @param[in] steps, maximum number of steps to perform gauge fixing
    * @param[in] verbose_interval, print gauge fixing info when iteration count is a multiple of this
    * @param[in] alpha, gauge fixing parameter of the method, most common value is 0.08
-   * @param[in] autotune, 1 to autotune the method, i.e., if the Fg inverts its tendency we decrease the alpha value
+   * @param[in] autotune, QUDA_BOOLEAN_TRUE to autotune the method, i.e., if the fix quality inverts its tendency we decrease the alpha value
    * @param[in] tolerance, torelance value to stop the method, if this
    * value is zero then the method stops when iteration reachs the
-   * maximum number of steps defined by Nsteps
-   * @param[in] stopWtheta, 0 for MILC criterion and 1 to use the theta value
+   * maximum number of steps defined by steps
+   * @param[in] theta_condition, QUDA_BOOLEAN_FALSE for MILC criterion and QUDA_BOOLEAN_TRUE to use the theta value
    */
-  void gaugeFixingFFT(GaugeField &data, const int gauge_dir, const int Nsteps, const int verbose_interval,
-                      const double alpha, const int autotune, const double tolerance, const int stopWtheta);
+  void gaugeFixingFFT(GaugeField &data, const int gauge_dir, const int steps, const int verbose_interval,
+                      const double alpha, const QudaBoolean autotune, const double tolerance, const QudaBoolean theta_condition);
 
   /**
      @brief Compute the Fmunu tensor
diff --git a/include/quda.h b/include/quda.h
index 419bd1febe..3401cc4f67 100644
--- a/include/quda.h
+++ b/include/quda.h
@@ -779,6 +779,22 @@ extern "C" {
     QudaBLASDataOrder data_order; /**< Specifies if using Row or Column major */
   } QudaBLASParam;
 
+  typedef struct QudaGaugeFixParam_s {
+    size_t struct_size; /**< Size of this struct in bytes.  Used to ensure that the host application and QUDA see the same struct size */
+
+    QudaGaugeFixType fix_type;   /**< The aglorithm to use for gauge fixing */
+    int gauge_dir;               /**< The orthogonal direction of the gauge fixing, 3=Coulomb, 4=Landau. (default 4) */
+    int maxiter;                 /**< The maximun number of gauge fixing iterations to be applied (default 10000) */
+    int verbosity_interval;      /**< Print the gauge fixing progress every N steps (default 100) */
+    double ovr_relaxation_boost; /**< The overrelaxation boost parameter for the overrelaxation method (default 1.5) */
+    double fft_alpha;            /**< The Alpha parameter in the FFT method (default 0.8) */
+    QudaBoolean fft_autotune;    /**< Autotune the Alpha parameter in the FFT method (default true) */
+    int reunit_interval;         /**< Reunitarise the gauge field every N steps (default 10) */
+    double tolerance;            /**< The tolerance of the gauge fixing quality (default 1e-6) */
+    QudaBoolean theta_condition; /**< "Use the theta value to determine the gauge fixing if true. If false, use the delta value (default false)" */
+  } QudaGaugeFixParam;
+
+  
   /*
    * Interface functions, found in interface_quda.cpp
    */
@@ -955,6 +971,15 @@ extern "C" {
    */
   QudaBLASParam newQudaBLASParam(void);
 
+  /**
+   * A new QudaGaugeFixParam should always be initialized immediately
+   * after it's defined (and prior to explicitly setting its members)
+   * using this function.  Typical usage is as follows:
+   *
+   *   QudaGaugeFixParam fix_param = newQudaGaugeFixParam();
+   */
+  QudaGaugeFixParam newQudaGaugeFixParam(void);
+
   /**
    * Print the members of QudaGaugeParam.
    * @param param The QudaGaugeParam whose elements we are to print.
@@ -991,6 +1016,12 @@ extern "C" {
    */
   void printQudaBLASParam(QudaBLASParam *param);
 
+  /**
+   * Print the members of QudaGaugeFixParam.
+   * @param param The QudaGaugeFixParam whose elements we are to print.
+   */
+  void printQudaGaugeFixParam(QudaGaugeFixParam *param);
+  
   /**
    * Load the gauge field from the host.
    * @param h_gauge Base pointer to host gauge field (regardless of dimensionality)
@@ -1505,42 +1536,14 @@ extern "C" {
                     const int *X);
 
   /**
-   * @brief Gauge fixing with overrelaxation with support for single and multi GPU.
+   * @brief Gauge fixing with overrelaxation with support for single and multi GPU, and steepest descent FFT with support for single GPU only.
    * @param[in,out] gauge, gauge field to be fixed
-   * @param[in] gauge_dir, 3 for Coulomb gauge fixing, other for Landau gauge fixing
-   * @param[in] Nsteps, maximum number of steps to perform gauge fixing
-   * @param[in] verbose_interval, print gauge fixing info when iteration count is a multiple of this
-   * @param[in] relax_boost, gauge fixing parameter of the overrelaxation method, most common value is 1.5 or 1.7.
-   * @param[in] tolerance, torelance value to stop the method, if this value is zero then the method stops when
-   * iteration reachs the maximum number of steps defined by Nsteps
-   * @param[in] reunit_interval, reunitarize gauge field when iteration count is a multiple of this
-   * @param[in] stopWtheta, 0 for MILC criterion and 1 to use the theta value
-   * @param[in] param The parameters of the external fields and the computation settings
-   * @param[out] timeinfo
-   */
-  int computeGaugeFixingOVRQuda(void *gauge, const unsigned int gauge_dir, const unsigned int Nsteps,
-                                const unsigned int verbose_interval, const double relax_boost, const double tolerance,
-                                const unsigned int reunit_interval, const unsigned int stopWtheta,
-                                QudaGaugeParam *param, double *timeinfo);
-  /**
-   * @brief Gauge fixing with Steepest descent method with FFTs with support for single GPU only.
-   * @param[in,out] gauge, gauge field to be fixed
-   * @param[in] gauge_dir, 3 for Coulomb gauge fixing, other for Landau gauge fixing
-   * @param[in] Nsteps, maximum number of steps to perform gauge fixing
-   * @param[in] verbose_interval, print gauge fixing info when iteration count is a multiple of this
-   * @param[in] alpha, gauge fixing parameter of the method, most common value is 0.08
-   * @param[in] autotune (legacy), 1 to autotune the method, i.e., if the Fg inverts its tendency we decrease the alpha value. We hardocde this to 1 to ensure optimal behaviour. Instructions on how the user may regain control of this parameter are located in comments in lib/gauge_fix_fft.cu
-   * @param[in] tolerance, torelance value to stop the method, if this value is zero then the method stops when
-   * iteration reachs the maximum number of steps defined by Nsteps
-   * @param[in] stopWtheta, 0 for MILC criterion and 1 to use the theta value
-   * @param[in] param The parameters of the external fields and the computation settings
-   * @param[out] timeinfo
-   */
-  int computeGaugeFixingFFTQuda(void *gauge, const unsigned int gauge_dir, const unsigned int Nsteps,
-                                const unsigned int verbose_interval, const double alpha, const unsigned int autotune,
-                                const double tolerance, const unsigned int stopWtheta, QudaGaugeParam *param,
-                                double *timeinfo);
-
+   * @param[in] gauge_param The parameters of the external fields and the computation settings
+   * @param[in] fix_param Container for the gauge fixing algorithm and parameters to use.
+   * @param[out] timeinfo Array to track timings 
+   */
+  int computeGaugeFixingQuda(void *gauge, QudaGaugeParam *gauge_param, QudaGaugeFixParam *fix_param, double *timeinfo);
+  
   /**
    * @brief Strided Batched GEMM
    * @param[in] arrayA The array containing the A matrix data
diff --git a/lib/check_params.h b/lib/check_params.h
index f0784ada1b..28e8370fa6 100644
--- a/lib/check_params.h
+++ b/lib/check_params.h
@@ -1045,8 +1045,55 @@ void printQudaBLASParam(QudaBLASParam *param)
 #endif
 }
 
-// clean up
+#if defined INIT_PARAM
+QudaGaugeFixParam newQudaGaugeFixParam(void)
+{
+  QudaGaugeFixParam ret;
+#elif defined CHECK_PARAM
+static void checkGaugeFixParam(QudaGaugeFixParam *param)
+{
+#else
+void printQudaGaugeFixParam(QudaGaugeFixParam *param)
+{
+  printfQuda("QUDA gauge fix parameters:\n");
+#endif
 
+#if defined CHECK_PARAM
+  if (param->struct_size != (size_t)INVALID_INT && param->struct_size != sizeof(*param))
+    errorQuda("Unexpected QudaGaugeFixParam struct size %lu, expected %lu", param->struct_size, sizeof(*param));
+#else
+  P(struct_size, (size_t)INVALID_INT);
+#endif
+
+#ifdef INIT_PARAM
+  P(gauge_dir, 4);
+  P(maxiter, 10000);
+  P(verbosity_interval, 100);
+  P(reunit_interval, 10);
+  P(ovr_relaxation_boost, 0.0);
+  P(fft_alpha, 0.0);
+  P(tolerance, 0.0);
+  P(fft_autotune, QUDA_BOOLEAN_FALSE);
+  P(theta_condition, QUDA_BOOLEAN_FALSE);
+#else
+  P(gauge_dir, INVALID_INT);
+  P(maxiter, INVALID_INT);
+  P(verbosity_interval, INVALID_INT);
+  P(reunit_interval, INVALID_INT);
+  P(ovr_relaxation_boost, INVALID_DOUBLE);
+  P(fft_alpha, INVALID_DOUBLE);
+  P(tolerance, INVALID_DOUBLE);
+  P(fft_autotune, QUDA_BOOLEAN_FALSE);
+  P(theta_condition, QUDA_BOOLEAN_FALSE);
+#endif
+
+#ifdef INIT_PARAM
+  return ret;
+#endif
+}
+
+
+// clean up
 #undef INVALID_INT
 #undef INVALID_DOUBLE
 #undef P
diff --git a/lib/gauge_fix_fft.cu b/lib/gauge_fix_fft.cu
index c92f80a242..74e0849d24 100644
--- a/lib/gauge_fix_fft.cu
+++ b/lib/gauge_fix_fft.cu
@@ -182,31 +182,20 @@ namespace quda {
   };
 
   template <typename Float, QudaReconstructType recon, int gauge_dir>
-  void gaugeFixingFFT(GaugeField& data, int Nsteps, int verbose_interval,
-                      double alpha0, int autotune, double tolerance, int stopWtheta)
+  void gaugeFixingFFT(GaugeField& data, int steps, int verbose_interval,
+                      double alpha0, QudaBoolean autotune, double tolerance, QudaBoolean theta_condition)
   {
-    // We hardcode the value of autotune to 1 at this point as it is the last
-    // point before computation begins. This ensures that the the user cannot
-    // override alpha autotuning. This is done because it is very easy for the
-    // FFT gauge fixing to fail with a poorly chosen value of alpha, but
-    // autotuning ensures optimal behaviour.
-    // Users who wish to change this behaviour may remove the follwing line
-    // of code and recompile to regain control of alpha autotuning.
-    autotune = 1;
-    
     TimeProfile profileInternalGaugeFixFFT("InternalGaugeFixQudaFFT", false);
 
     profileInternalGaugeFixFFT.TPSTART(QUDA_PROFILE_COMPUTE);
 
     if (getVerbosity() >= QUDA_SUMMARIZE) {
-      if(autotune == 1) printfQuda("\tAuto tune active: alpha will be adjusted as the algorithm progresses\n");
-      else if(autotune == 0) printfQuda("\tAuto tune not active: alpha will remain constant as the algorithm progresses\n");
-      else errorQuda("Unknown value of autotune = %d", autotune);
-      
+      if(autotune == QUDA_BOOLEAN_TRUE) printfQuda("\tAuto tune active: alpha will be adjusted as the algorithm progresses\n");
+      else printfQuda("\tAuto tune not active: alpha will remain constant as the algorithm progresses\n");      
       printfQuda("\tAlpha parameter of the Steepest Descent Method: %e\n", alpha0);
       printfQuda("\tTolerance: %e\n", tolerance);
-      printfQuda("\tStop criterion method: %s\n", stopWtheta ? "Theta" : "Delta");
-      printfQuda("\tMaximum number of iterations: %d\n", Nsteps);
+      printfQuda("\tStop criterion method: %s\n", theta_condition == QUDA_BOOLEAN_TRUE ? "Theta" : "Delta");
+      printfQuda("\tMaximum number of iterations: %d\n", steps);
       printfQuda("\tPrint convergence results at every %d steps\n", verbose_interval);
     }
     
@@ -233,7 +222,7 @@ namespace quda {
 
     double diff = 0.0;
     int iter = 0;
-    for (iter = 0; iter < Nsteps; iter++) {
+    for (iter = 0; iter < steps; iter++) {
       for (int k = 0; k < 6; k++) {
         //------------------------------------------------------------------------
         // Set a pointer do the element k in lattice volume
@@ -298,7 +287,7 @@ namespace quda {
       diff = abs(action0 - action);
       if ((iter % verbose_interval) == (verbose_interval - 1) && getVerbosity() >= QUDA_SUMMARIZE)
         printf("Step: %05d\tAction: %.16e\ttheta: %.16e\tDelta: %.16e\n", iter + 1, argQ.getAction(), argQ.getTheta(), diff);
-      if ( autotune && ((action - action0) < -1e-14) ) {
+      if ( autotune == QUDA_BOOLEAN_TRUE && ((action - action0) < -1e-14) ) {
         if ( arg.alpha > 0.01 ) {
           arg.alpha = 0.95 * arg.alpha;
           if(getVerbosity() >= QUDA_SUMMARIZE) printf("Changing alpha down -> %.4e\n", arg.alpha);
@@ -307,7 +296,7 @@ namespace quda {
       //------------------------------------------------------------------------
       // Check gauge fix quality criterion
       //------------------------------------------------------------------------
-      if ( stopWtheta ) {   if ( argQ.getTheta() < tolerance ) break; }
+      if ( theta_condition == QUDA_BOOLEAN_TRUE ) {   if ( argQ.getTheta() < tolerance ) break; }
       else { if ( diff < tolerance ) break; }
 
       action0 = action;
@@ -368,21 +357,22 @@ namespace quda {
     
     gflops = (gflops * 1e-9) / (secs);
     gbytes = gbytes / (secs * 1e9);
-    if (getVerbosity() > QUDA_SUMMARIZE) printfQuda("Time: %6.6f s, Gflop/s = %6.1f, GB/s = %6.1f\n", secs, gflops, gbytes);
-
+    if (getVerbosity() > QUDA_SUMMARIZE)
+      printfQuda("Time: %6.6f s, Gflop/s = %6.1f, GB/s = %6.1f\n", secs, gflops, gbytes);
+    
     host_free(num_failures_h);
   }
 
   template<typename Float, int nColors, QudaReconstructType recon> struct GaugeFixingFFT {
-    GaugeFixingFFT(GaugeField& data, int gauge_dir, int Nsteps, int verbose_interval,
-                   double alpha, int autotune, double tolerance, int stopWtheta)
+    GaugeFixingFFT(GaugeField& data, int gauge_dir, int steps, int verbose_interval,
+                   double alpha, QudaBoolean autotune, double tolerance, QudaBoolean theta_condition)
     {
       if (gauge_dir != 3) {
 	if (getVerbosity() > QUDA_SUMMARIZE) printfQuda("Starting Landau gauge fixing with FFTs...\n");
-        gaugeFixingFFT<Float, recon, 4>(data, Nsteps, verbose_interval, alpha, autotune, tolerance, stopWtheta);
+        gaugeFixingFFT<Float, recon, 4>(data, steps, verbose_interval, alpha, autotune, tolerance, theta_condition);
       } else {
 	if (getVerbosity() > QUDA_SUMMARIZE) printfQuda("Starting Coulomb gauge fixing with FFTs...\n");
-        gaugeFixingFFT<Float, recon, 3>(data, Nsteps, verbose_interval, alpha, autotune, tolerance, stopWtheta);
+        gaugeFixingFFT<Float, recon, 3>(data, steps, verbose_interval, alpha, autotune, tolerance, theta_condition);
       }
     }
   };
@@ -391,22 +381,22 @@ namespace quda {
    * @brief Gauge fixing with Steepest descent method with FFTs with support for single GPU only.
    * @param[in,out] data, quda gauge field
    * @param[in] gauge_dir, 3 for Coulomb gauge fixing, other for Landau gauge fixing
-   * @param[in] Nsteps, maximum number of steps to perform gauge fixing
+   * @param[in] steps, maximum number of steps to perform gauge fixing
    * @param[in] verbose_interval, print gauge fixing info when iteration count is a multiple of this
    * @param[in] alpha, gauge fixing parameter of the method, most common value is 0.08
-   * @param[in] autotune (legacy), 1 to autotune the method, i.e., if the Fg inverts its tendency we decrease the alpha value. We hardcode this to true.
-   * @param[in] tolerance, torelance value to stop the method, if this value is zero then the method stops when iteration reachs the maximum number of steps defined by Nsteps
-   * @param[in] stopWtheta, 0 for MILC criterion and 1 to use the theta value
+   * @param[in] autotune QUDA_BOOLEAN_TRUE to autotune the method, i.e., if the fix quality inverts its tendency we decrease the alpha value.
+   * @param[in] tolerance, torelance value to stop the method, if this value is zero then the method stops when iteration reachs the maximum number of steps defined by steps
+   * @param[in] theta_condition, QUDA_BOOLEAN_FALSE for MILC criterion and QUDA_BOOLEAN_TRUE to use the theta value
    */
 #if defined(GPU_GAUGE_ALG)
-  void gaugeFixingFFT(GaugeField& data, const int gauge_dir, const int Nsteps, const int verbose_interval, const double alpha,
-                      const int autotune, const double tolerance, const int stopWtheta)
+  void gaugeFixingFFT(GaugeField& data, const int gauge_dir, const int steps, const int verbose_interval, const double alpha,
+                      const QudaBoolean autotune, const double tolerance, const QudaBoolean theta_condition)
   {
     if (comm_partitioned()) errorQuda("Gauge Fixing with FFTs in multi-GPU support NOT implemented yet!");
-    instantiate<GaugeFixingFFT, ReconstructNo12>(data, gauge_dir, Nsteps, verbose_interval, alpha, autotune, tolerance, stopWtheta);
+    instantiate<GaugeFixingFFT, ReconstructNo12>(data, gauge_dir, steps, verbose_interval, alpha, autotune, tolerance, theta_condition);
   }
 #else
-  void gaugeFixingFFT(GaugeField&, const int, const int, const int, const double, const int, const double, const int)
+  void gaugeFixingFFT(GaugeField&, const int, const int, const int, const double, const QudaBoolean, const double, const QudaBoolean)
   {
     errorQuda("Gauge fixing has bot been built");
   }
diff --git a/lib/gauge_fix_ovr.cu b/lib/gauge_fix_ovr.cu
index 93a3521530..e56e5e05c5 100644
--- a/lib/gauge_fix_ovr.cu
+++ b/lib/gauge_fix_ovr.cu
@@ -223,9 +223,9 @@ namespace quda {
   };
 
   template <typename Float, QudaReconstructType recon, int gauge_dir>
-  void gaugeFixingOVR(GaugeField &data,const int Nsteps, const int verbose_interval,
+  void gaugeFixingOVR(GaugeField &data, const int steps, const int verbose_interval,
                       const double relax_boost, const double tolerance,
-                      const int reunit_interval, const int stopWtheta)
+                      const int reunit_interval, const QudaBoolean theta_condition)
   {
     TimeProfile profileInternalGaugeFixOVR("InternalGaugeFixQudaOVR", false);
 
@@ -236,8 +236,8 @@ namespace quda {
     if (getVerbosity() >= QUDA_SUMMARIZE) {
       printfQuda("\tOverrelaxation boost parameter: %e\n", relax_boost);
       printfQuda("\tTolerance: %le\n", tolerance);
-      printfQuda("\tStop criterion method: %s\n", stopWtheta ? "Theta" : "Delta");
-      printfQuda("\tMaximum number of iterations: %d\n", Nsteps);
+      printfQuda("\tStop criterion method: %s\n", theta_condition == QUDA_BOOLEAN_TRUE ? "Theta" : "Delta");
+      printfQuda("\tMaximum number of iterations: %d\n", steps);
       printfQuda("\tReunitarize at every %d steps\n", reunit_interval);
       printfQuda("\tPrint convergence results at every %d steps\n", verbose_interval);
     }
@@ -324,7 +324,7 @@ namespace quda {
     GaugeFix<Float, recon, gauge_dir> gfixBorderPoints(data, relax_boost, borderpoints, true, threads);
 
     int iter = 0;
-    for (iter = 0; iter < Nsteps; iter++) {
+    for (iter = 0; iter < steps; iter++) {
       for (int p = 0; p < 2; p++) {
         if (comm_partitioned()) {
           gfixBorderPoints.setParity(p); //compute border points
@@ -414,7 +414,7 @@ namespace quda {
       double diff = abs(action0 - action);
       if ((iter % verbose_interval) == (verbose_interval - 1) && getVerbosity() >= QUDA_VERBOSE)
         printfQuda("Step: %05d\tAction: %.16e\ttheta: %.16e\tDelta: %.16e\n", iter + 1, argQ.getAction(), argQ.getTheta(), diff);
-      if (stopWtheta) {
+      if (theta_condition == QUDA_BOOLEAN_TRUE) {
         if (argQ.getTheta() < tolerance) break;
       } else {
         if ( diff < tolerance ) break;
@@ -470,15 +470,15 @@ namespace quda {
   }
 
   template <typename Float, int nColor, QudaReconstructType recon> struct GaugeFixingOVR {
-  GaugeFixingOVR(GaugeField& data, const int gauge_dir, const int Nsteps, const int verbose_interval,
-                 const double relax_boost, const double tolerance, const int reunit_interval, const int stopWtheta)
+  GaugeFixingOVR(GaugeField& data, const int gauge_dir, const int steps, const int verbose_interval,
+                 const double relax_boost, const double tolerance, const int reunit_interval, const QudaBoolean theta_condition)
     {
       if (gauge_dir == 4) {
 	if (getVerbosity() >= QUDA_SUMMARIZE) printfQuda("Starting Landau gauge fixing...\n");
-        gaugeFixingOVR<Float, recon, 4>(data, Nsteps, verbose_interval, relax_boost, tolerance, reunit_interval, stopWtheta);
+        gaugeFixingOVR<Float, recon, 4>(data, steps, verbose_interval, relax_boost, tolerance, reunit_interval, theta_condition);
       } else if (gauge_dir == 3) {
 	if (getVerbosity() >= QUDA_SUMMARIZE) printfQuda("Starting Coulomb gauge fixing...\n");
-        gaugeFixingOVR<Float, recon, 3>(data, Nsteps, verbose_interval, relax_boost, tolerance, reunit_interval, stopWtheta);
+        gaugeFixingOVR<Float, recon, 3>(data, steps, verbose_interval, relax_boost, tolerance, reunit_interval, theta_condition);
       } else {
         errorQuda("Unexpected gauge_dir = %d", gauge_dir);
       }
@@ -489,21 +489,21 @@ namespace quda {
    * @brief Gauge fixing with overrelaxation with support for single and multi GPU.
    * @param[in,out] data, quda gauge field
    * @param[in] gauge_dir, 3 for Coulomb gauge fixing, other for Landau gauge fixing
-   * @param[in] Nsteps, maximum number of steps to perform gauge fixing
+   * @param[in] steps, maximum number of steps to perform gauge fixing
    * @param[in] verbose_interval, print gauge fixing info when iteration count is a multiple of this
    * @param[in] relax_boost, gauge fixing parameter of the overrelaxation method, most common value is 1.5 or 1.7.
-   * @param[in] tolerance, torelance value to stop the method, if this value is zero then the method stops when iteration reachs the maximum number of steps defined by Nsteps
+   * @param[in] tolerance, torelance value to stop the method, if this value is zero then the method stops when iteration reachs the maximum number of steps defined by steps
    * @param[in] reunit_interval, reunitarize gauge field when iteration count is a multiple of this
-   * @param[in] stopWtheta, 0 for MILC criterion and 1 to use the theta value
+   * @param[in] theta_condition, QUDA_BOOLEAN_FALSE for MILC criterion and QUDA_BOOLEAN_TRUE to use the theta value
    */
 #ifdef GPU_GAUGE_ALG
-  void gaugeFixingOVR(GaugeField& data, const int gauge_dir, const int Nsteps, const int verbose_interval, const double relax_boost,
-                      const double tolerance, const int reunit_interval, const int stopWtheta)
+  void gaugeFixingOVR(GaugeField& data, const int gauge_dir, const int steps, const int verbose_interval, const double relax_boost,
+                      const double tolerance, const int reunit_interval, const QudaBoolean theta_condition)
   {
-    instantiate<GaugeFixingOVR>(data, gauge_dir, Nsteps, verbose_interval, relax_boost, tolerance, reunit_interval, stopWtheta);
+    instantiate<GaugeFixingOVR>(data, gauge_dir, steps, verbose_interval, relax_boost, tolerance, reunit_interval, theta_condition);
   }
 #else
-  void gaugeFixingOVR(GaugeField&, const int, const int, const int, const double, const double, const int, const int)
+  void gaugeFixingOVR(GaugeField&, const int, const int, const int, const double, const double, const int, const QudaBoolean)
   {
     errorQuda("Gauge fixing has not been built");
   }
diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp
index dc395a1e57..985c985f9e 100644
--- a/lib/interface_quda.cpp
+++ b/lib/interface_quda.cpp
@@ -234,8 +234,7 @@ static TimeProfile profileMomAction("momActionQuda");
 static TimeProfile profileEnd("endQuda");
 
 //!< Profiler for GaugeFixing
-static TimeProfile profileGaugeFixFFT("gaugeFixFFTQuda");
-static TimeProfile profileGaugeFixOVR("gaugeFixOVRQuda");
+static TimeProfile profileGaugeFix("gaugeFixQuda");
 
 //!< Profiler for toal time spend between init and end
 static TimeProfile profileInit2End("initQuda-endQuda",false);
@@ -1547,8 +1546,7 @@ void endQuda(void)
     profileProject.Print();
     profilePhase.Print();
     profileMomAction.Print();
-    profileGaugeFixOVR.Print();
-    profileGaugeFixFFT.Print();
+    profileGaugeFix.Print();
     profileEnd.Print();
 
     profileInit2End.Print();
@@ -5555,133 +5553,75 @@ void performWFlownStep(unsigned int n_steps, double step_size, int meas_interval
   popOutputPrefix();
 }
 
-int computeGaugeFixingOVRQuda(void *gauge, const unsigned int gauge_dir, const unsigned int Nsteps,
-                              const unsigned int verbose_interval, const double relax_boost, const double tolerance,
-                              const unsigned int reunit_interval, const unsigned int stopWtheta, QudaGaugeParam *param,
-                              double *timeinfo)
-{
-  profileGaugeFixOVR.TPSTART(QUDA_PROFILE_TOTAL);
-
-  checkGaugeParam(param);
-
-  profileGaugeFixOVR.TPSTART(QUDA_PROFILE_INIT);
-  GaugeFieldParam gParam(*param, gauge);
-  auto *cpuGauge = new cpuGaugeField(gParam);
-
-  // gParam.pad = getFatLinkPadding(param->X);
-  gParam.create = QUDA_NULL_FIELD_CREATE;
-  gParam.link_type = param->type;
-  gParam.reconstruct = param->reconstruct;
-  gParam.setPrecision(gParam.Precision(), true);
-  auto *cudaInGauge = new cudaGaugeField(gParam);
-
-  profileGaugeFixOVR.TPSTOP(QUDA_PROFILE_INIT);
-  profileGaugeFixOVR.TPSTART(QUDA_PROFILE_H2D);
-
-  cudaInGauge->loadCPUField(*cpuGauge);
-
-  profileGaugeFixOVR.TPSTOP(QUDA_PROFILE_H2D);
-
-  if (comm_size() == 1) {
-    // perform the update
-    profileGaugeFixOVR.TPSTART(QUDA_PROFILE_COMPUTE);
-    gaugeFixingOVR(*cudaInGauge, gauge_dir, Nsteps, verbose_interval, relax_boost, tolerance, reunit_interval,
-                   stopWtheta);
-    profileGaugeFixOVR.TPSTOP(QUDA_PROFILE_COMPUTE);
-  } else {
-    cudaGaugeField *cudaInGaugeEx = createExtendedGauge(*cudaInGauge, R, profileGaugeFixOVR);
-
-    // Perform the update
-    profileGaugeFixOVR.TPSTART(QUDA_PROFILE_COMPUTE);
-    gaugeFixingOVR(*cudaInGaugeEx, gauge_dir, Nsteps, verbose_interval, relax_boost, tolerance, reunit_interval,
-                   stopWtheta);
-    profileGaugeFixOVR.TPSTOP(QUDA_PROFILE_COMPUTE);
-
-    copyExtendedGauge(*cudaInGauge, *cudaInGaugeEx, QUDA_CUDA_FIELD_LOCATION);
-  }
-
-  // Copy the gauge field back to the host
-  profileGaugeFixOVR.TPSTART(QUDA_PROFILE_D2H);
-  cudaInGauge->saveCPUField(*cpuGauge);
-  profileGaugeFixOVR.TPSTOP(QUDA_PROFILE_D2H);
-
-  profileGaugeFixOVR.TPSTOP(QUDA_PROFILE_TOTAL);
-
-  if (param->make_resident_gauge) {
-    if (gaugePrecise != nullptr) delete gaugePrecise;
-    gaugePrecise = cudaInGauge;
-  } else {
-    delete cudaInGauge;
-  }
-
-  if(timeinfo){
-    timeinfo[0] = profileGaugeFixOVR.Last(QUDA_PROFILE_H2D);
-    timeinfo[1] = profileGaugeFixOVR.Last(QUDA_PROFILE_COMPUTE);
-    timeinfo[2] = profileGaugeFixOVR.Last(QUDA_PROFILE_D2H);
-  }
-
-  return 0;
-}
-
-int computeGaugeFixingFFTQuda(void* gauge, const unsigned int gauge_dir,  const unsigned int Nsteps, \
-  const unsigned int verbose_interval, const double alpha, const unsigned int autotune, const double tolerance, \
-  const unsigned int  stopWtheta, QudaGaugeParam* param , double* timeinfo)
-{
-  profileGaugeFixFFT.TPSTART(QUDA_PROFILE_TOTAL);
-
-  checkGaugeParam(param);
-
-  profileGaugeFixFFT.TPSTART(QUDA_PROFILE_INIT);
-
-  GaugeFieldParam gParam(*param, gauge);
-  auto *cpuGauge = new cpuGaugeField(gParam);
-
-  //gParam.pad = getFatLinkPadding(param->X);
-  gParam.create      = QUDA_NULL_FIELD_CREATE;
-  gParam.link_type   = param->type;
-  gParam.reconstruct = param->reconstruct;
-  gParam.setPrecision(gParam.Precision(), true);
-  auto *cudaInGauge = new cudaGaugeField(gParam);
-
-  profileGaugeFixFFT.TPSTOP(QUDA_PROFILE_INIT);
+int computeGaugeFixingQuda(void *gauge, QudaGaugeParam *g_param, QudaGaugeFixParam *fix_param, double *timeinfo)
+{  
+  profileGaugeFix.TPSTART(QUDA_PROFILE_TOTAL);
 
-  profileGaugeFixFFT.TPSTART(QUDA_PROFILE_H2D);
+  // Check parameters
+  checkGaugeParam(g_param);
+  checkGaugeFixParam(fix_param);
 
+  // Create host and device fields
+  profileGaugeFix.TPSTART(QUDA_PROFILE_INIT);
+  GaugeFieldParam gauge_param(*g_param, gauge);
+  auto *cpuGauge = new cpuGaugeField(gauge_param);
+  gauge_param.create = QUDA_NULL_FIELD_CREATE;
+  gauge_param.link_type = g_param->type;
+  gauge_param.reconstruct = g_param->reconstruct;
+  gauge_param.setPrecision(gauge_param.Precision(), true);
+  auto *cudaInGauge = new cudaGaugeField(gauge_param);
+  profileGaugeFix.TPSTOP(QUDA_PROFILE_INIT);
+
+  // Load gauge to device
+  profileGaugeFix.TPSTART(QUDA_PROFILE_H2D);
   cudaInGauge->loadCPUField(*cpuGauge);
-
-  profileGaugeFixFFT.TPSTOP(QUDA_PROFILE_H2D);
-
-  // perform the update
-  profileGaugeFixFFT.TPSTART(QUDA_PROFILE_COMPUTE);
-
-  // We hardcode the value of autotune to 1 in the kernel call (lib/gauge_fix_fft.cu)
-  // This ensures that the user can not override alpha autotuning. This is done because
-  // it is very easy for the FFT gauge fixing to fail with a poorly chosen value of
-  // alpha, but autotuning alpha ensures optimal behaviour.
-  // Users who wish to change this behaviour may read the comment in
-  // lib/gauge_fix_fft.cu to regain control.
-  gaugeFixingFFT(*cudaInGauge, gauge_dir, Nsteps, verbose_interval, alpha, autotune, tolerance, stopWtheta);
+  profileGaugeFix.TPSTOP(QUDA_PROFILE_H2D);
   
-  profileGaugeFixFFT.TPSTOP(QUDA_PROFILE_COMPUTE);
+  // Perform the update
+  switch(fix_param->fix_type) {
+    
+  case QUDA_GAUGEFIX_TYPE_OVR:
+    if (comm_size() == 1) {
+      profileGaugeFix.TPSTART(QUDA_PROFILE_COMPUTE);
+      gaugeFixingOVR(*cudaInGauge, fix_param->gauge_dir, fix_param->maxiter, fix_param->verbosity_interval, fix_param->ovr_relaxation_boost, fix_param->tolerance, fix_param->reunit_interval, fix_param->theta_condition);
+      profileGaugeFix.TPSTOP(QUDA_PROFILE_COMPUTE);
+    } else {
+      // For MPI, we must perform a halo exchange
+      cudaGaugeField *cudaInGaugeEx = createExtendedGauge(*cudaInGauge, R, profileGaugeFix);
+      profileGaugeFix.TPSTART(QUDA_PROFILE_COMPUTE);
+      gaugeFixingOVR(*cudaInGaugeEx, fix_param->gauge_dir, fix_param->maxiter, fix_param->verbosity_interval, fix_param->ovr_relaxation_boost, fix_param->tolerance, fix_param->reunit_interval, fix_param->theta_condition);
+      profileGaugeFix.TPSTOP(QUDA_PROFILE_COMPUTE);
+      copyExtendedGauge(*cudaInGauge, *cudaInGaugeEx, QUDA_CUDA_FIELD_LOCATION);
+    }
+    break;
+    
+  case QUDA_GAUGEFIX_TYPE_FFT:
+    profileGaugeFix.TPSTART(QUDA_PROFILE_COMPUTE);
+    gaugeFixingFFT(*cudaInGauge, fix_param->gauge_dir, fix_param->maxiter, fix_param->verbosity_interval, fix_param->fft_alpha, fix_param->fft_autotune, fix_param->tolerance, fix_param->theta_condition);
+    profileGaugeFix.TPSTOP(QUDA_PROFILE_COMPUTE);
+    break;
+    
+  default:
+    errorQuda("Unkown gauge fix type %d", fix_param->fix_type);
+  }
   
-  // copy the gauge field back to the host
-  profileGaugeFixFFT.TPSTART(QUDA_PROFILE_D2H);
+  // Copy the fixed gauge field back to the host
+  profileGaugeFix.TPSTART(QUDA_PROFILE_D2H);
   cudaInGauge->saveCPUField(*cpuGauge);
-  profileGaugeFixFFT.TPSTOP(QUDA_PROFILE_D2H);
-
-  profileGaugeFixFFT.TPSTOP(QUDA_PROFILE_TOTAL);
-
-  if (param->make_resident_gauge) {
+  profileGaugeFix.TPSTOP(QUDA_PROFILE_D2H);
+  
+  profileGaugeFix.TPSTOP(QUDA_PROFILE_TOTAL);
+  if (g_param->make_resident_gauge) {
     if (gaugePrecise != nullptr) delete gaugePrecise;
     gaugePrecise = cudaInGauge;
   } else {
     delete cudaInGauge;
   }
 
-  if (timeinfo) {
-    timeinfo[0] = profileGaugeFixFFT.Last(QUDA_PROFILE_H2D);
-    timeinfo[1] = profileGaugeFixFFT.Last(QUDA_PROFILE_COMPUTE);
-    timeinfo[2] = profileGaugeFixFFT.Last(QUDA_PROFILE_D2H);
+  if(timeinfo){
+    timeinfo[0] = profileGaugeFix.Last(QUDA_PROFILE_H2D);
+    timeinfo[1] = profileGaugeFix.Last(QUDA_PROFILE_COMPUTE);
+    timeinfo[2] = profileGaugeFix.Last(QUDA_PROFILE_D2H);
   }
 
   return 0;
diff --git a/lib/milc_interface.cpp b/lib/milc_interface.cpp
index 5d7322672f..e752cc0a2f 100644
--- a/lib/milc_interface.cpp
+++ b/lib/milc_interface.cpp
@@ -2698,15 +2698,23 @@ void qudaCloverMultishiftInvert(int external_precision, int quda_precision, int
 void qudaGaugeFixingOVR(int precision, unsigned int gauge_dir, int Nsteps, int verbose_interval, double relax_boost,
                         double tolerance, unsigned int reunit_interval, unsigned int stopWtheta, void *milc_sitelink)
 {
-  QudaGaugeParam qudaGaugeParam = newMILCGaugeParam(localDim,
+  QudaGaugeParam gauge_param = newMILCGaugeParam(localDim,
       (precision==1) ? QUDA_SINGLE_PRECISION : QUDA_DOUBLE_PRECISION,
       QUDA_SU3_LINKS);
-  qudaGaugeParam.reconstruct = QUDA_RECONSTRUCT_NO;
+  gauge_param.reconstruct = QUDA_RECONSTRUCT_NO;
   //qudaGaugeParam.reconstruct = QUDA_RECONSTRUCT_12;
 
+  QudaGaugeFixParam fix_param = newQudaGaugeFixParam();
+  fix_param.gauge_dir = gauge_dir;
+  fix_param.maxiter = Nsteps;
+  fix_param.verbosity_interval = verbose_interval;
+  fix_param.ovr_relaxation_boost = relax_boost;
+  fix_param.tolerance = tolerance;
+  fix_param.reunit_interval = reunit_interval;
+  fix_param.theta_condition = stopWtheta == 0 ? QUDA_BOOLEAN_FALSE : QUDA_BOOLEAN_TRUE;
+  
   double timeinfo[3];
-  computeGaugeFixingOVRQuda(milc_sitelink, gauge_dir, Nsteps, verbose_interval, relax_boost, tolerance, reunit_interval, stopWtheta, \
-    &qudaGaugeParam, timeinfo);
+  computeGaugeFixingQuda(milc_sitelink, &gauge_param, &fix_param, timeinfo);
 
   printfQuda("Time H2D: %lf\n", timeinfo[0]);
   printfQuda("Time to Compute: %lf\n", timeinfo[1]);
@@ -2725,17 +2733,24 @@ void qudaGaugeFixingFFT( int precision,
     void* milc_sitelink
     )
 {
-  QudaGaugeParam qudaGaugeParam = newMILCGaugeParam(localDim,
+  QudaGaugeParam gauge_param = newMILCGaugeParam(localDim,
       (precision==1) ? QUDA_SINGLE_PRECISION : QUDA_DOUBLE_PRECISION,
       QUDA_GENERAL_LINKS);
-  qudaGaugeParam.reconstruct = QUDA_RECONSTRUCT_NO;
+  gauge_param.reconstruct = QUDA_RECONSTRUCT_NO;
   //qudaGaugeParam.reconstruct = QUDA_RECONSTRUCT_12;
 
+  QudaGaugeFixParam fix_param = newQudaGaugeFixParam();
+  fix_param.gauge_dir = gauge_dir;
+  fix_param.maxiter = Nsteps;
+  fix_param.verbosity_interval = verbose_interval;
+  fix_param.fft_alpha = alpha;
+  fix_param.tolerance = tolerance;
+  fix_param.theta_condition = stopWtheta == 0 ? QUDA_BOOLEAN_FALSE : QUDA_BOOLEAN_TRUE;
+  fix_param.fft_autotune = autotune == 0 ? QUDA_BOOLEAN_FALSE : QUDA_BOOLEAN_TRUE;
 
   double timeinfo[3];
-  computeGaugeFixingFFTQuda(milc_sitelink, gauge_dir, Nsteps, verbose_interval, alpha, autotune, tolerance, stopWtheta, \
-    &qudaGaugeParam, timeinfo);
-
+  computeGaugeFixingQuda(milc_sitelink, &gauge_param, &fix_param, timeinfo);
+  
   printfQuda("Time H2D: %lf\n", timeinfo[0]);
   printfQuda("Time to Compute: %lf\n", timeinfo[1]);
   printfQuda("Time D2H: %lf\n", timeinfo[2]);
diff --git a/tests/gauge_alg_ctest.cpp b/tests/gauge_alg_ctest.cpp
index 4afa0598ce..154f6f0d36 100644
--- a/tests/gauge_alg_ctest.cpp
+++ b/tests/gauge_alg_ctest.cpp
@@ -66,29 +66,35 @@ int gf_maxiter = 10000;
 int gf_verbosity_interval = 100;
 double gf_ovr_relaxation_boost = 1.5;
 double gf_fft_alpha = 0.8;
+QudaBoolean gf_fft_autotune = QUDA_BOOLEAN_TRUE;
 int gf_reunit_interval = 10;
 double gf_tolerance = 1e-6;
-bool gf_theta_condition = false;
+QudaBoolean gf_theta_condition = QUDA_BOOLEAN_FALSE;
+QudaGaugeFixType fix_type = QUDA_GAUGEFIX_TYPE_OVR;
 
 void add_gaugefix_option_group(std::shared_ptr<QUDAApp> quda_app)
 {
+  CLI::TransformPairs<QudaGaugeFixType> fix_type_map {{"ovr", QUDA_GAUGEFIX_TYPE_OVR},
+                                                      {"fft", QUDA_GAUGEFIX_TYPE_FFT}};
+  
   // Option group for gauge fixing related options
   auto opgroup = quda_app->add_option_group("gaugefix", "Options controlling gauge fixing tests");
   opgroup->add_option("--gf-dir", gf_gauge_dir,
-                      "The orthogonal direction of teh gauge fixing, 3=Coulomb, 4=Landau. (default 4)");
+                      "The orthogonal direction of the gauge fixing, 3=Coulomb, 4=Landau. (default 4)");
   opgroup->add_option("--gf-maxiter", gf_maxiter,
-                      "The maximun number of gauge fixing iterations to be applied (default 10000) ");
+                      "The maximun number of gauge fixing iterations to be applied (default 10000)");
   opgroup->add_option("--gf-verbosity-interval", gf_verbosity_interval,
                       "Print the gauge fixing progress every N steps (default 100)");
   opgroup->add_option("--gf-ovr-relaxation-boost", gf_ovr_relaxation_boost,
                       "The overrelaxation boost parameter for the overrelaxation method (default 1.5)");
   opgroup->add_option("--gf-fft-alpha", gf_fft_alpha, "The Alpha parameter in the FFT method (default 0.8)");
+  opgroup->add_option("--gf-fft-autotune", gf_fft_autotune, "Autotune the Alpha parameter in the FFT method (default true)");
   opgroup->add_option("--gf-reunit-interval", gf_reunit_interval,
                       "Reunitarise the gauge field every N steps (default 10)");
   opgroup->add_option("--gf-tol", gf_tolerance, "The tolerance of the gauge fixing quality (default 1e-6)");
-  opgroup->add_option(
-    "--gf-theta-condition", gf_theta_condition,
-    "Use the theta value to determine the gauge fixing if true. If false, use the delta value (default false)");
+  opgroup->add_option("--gf-theta-condition", gf_theta_condition,
+		      "Use the theta value to determine the gauge fixing if true. If false, use the delta value (default false)");
+  opgroup->add_option("--gf-fix-type", fix_type, "The type of algorithm to use for fixing (default ovr)")->transform(CLI::QUDACheckedTransformer(fix_type_map));
 }
 
 class GaugeAlgTest : public ::testing::Test {
@@ -147,9 +153,9 @@ class GaugeAlgTest : public ::testing::Test {
   {
     if (execute) {
       setVerbosity(QUDA_VERBOSE);
-      param = newQudaGaugeParam();
-
+      
       // Setup gauge container.
+      param = newQudaGaugeParam();
       setWilsonGaugeParam(param);
       param.t_boundary = QUDA_PERIODIC_T;
 
@@ -298,13 +304,7 @@ class GaugeAlgTest : public ::testing::Test {
     if (execute) {
       if (!checkDimsPartitioned()) {
         printfQuda("%s gauge fixing with steepest descent method with FFT\n", gf_gauge_dir == 4 ? "Landau" : "Coulomb");
-	// We hardcode the value of autotune to 1 in the kernel call (lib/gauge_fix_fft.cu)
-	// This ensures that the user can not override alpha autotuning. This is done because
-	// it is very easy for the FFT gauge fixing to fail with a poorly chosen value of
-	// alpha, but autotuning alpha ensures optimal behaviour.
-	// Users who wish to change this behaviour may read the comment in
-	// lib/gauge_fix_fft.cu to regain control.
-	gaugeFixingFFT(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, 1, gf_tolerance,
+	gaugeFixingFFT(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance,
                        gf_theta_condition);
 
         auto plaq_gf = plaquette(*U);
@@ -393,13 +393,7 @@ TEST_F(GaugeAlgTest, Landau_FFT)
   if (execute) {
     if (!comm_partitioned()) {
       printfQuda("Landau gauge fixing with steepest descent method with FFT\n");
-      // We hardcode the value of autotune to 1 in the kernel call (lib/gauge_fix_fft.cu)
-      // This ensures that the user can not override alpha autotuning. This is done because
-      // it is very easy for the FFT gauge fixing to fail with a poorly chosen value of
-      // alpha, but autotuning alpha ensures optimal behaviour.
-      // Users who wish to change this behaviour may read the comment in
-      // lib/gauge_fix_fft.cu to regain control.
-      gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, 1, gf_tolerance,
+      gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance,
                      gf_theta_condition);
       auto plaq_gf = plaquette(*U);
       printfQuda("Plaq:    %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z);
@@ -415,13 +409,7 @@ TEST_F(GaugeAlgTest, Coulomb_FFT)
   if (execute) {
     if (!comm_partitioned()) {
       printfQuda("Coulomb gauge fixing with steepest descent method with FFT\n");
-      // We hardcode the value of autotune to 1 in the kernel call (lib/gauge_fix_fft.cu)
-      // This ensures that the user can not override alpha autotuning. This is done because
-      // it is very easy for the FFT gauge fixing to fail with a poorly chosen value of
-      // alpha, but autotuning alpha ensures optimal behaviour.
-      // Users who wish to change this behaviour may read the comment in
-      // lib/gauge_fix_fft.cu to regain control.
-      gaugeFixingFFT(*U, 3, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, 1, gf_tolerance,
+      gaugeFixingFFT(*U, 3, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance,
                      gf_theta_condition);
 auto plaq_gf = plaquette(*U);
       printfQuda("Plaq:    %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z);

From 5ef51f73c478ea6d90e12c6d5621ac9052cbb530 Mon Sep 17 00:00:00 2001
From: cpviolator <dmhowarth26@gmail.com>
Date: Fri, 29 Oct 2021 17:44:49 -0700
Subject: [PATCH 21/32] Use host_timer_t in gauge_alg_ctest.cpp

---
 tests/gauge_alg_ctest.cpp | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/tests/gauge_alg_ctest.cpp b/tests/gauge_alg_ctest.cpp
index 154f6f0d36..6f52545616 100644
--- a/tests/gauge_alg_ctest.cpp
+++ b/tests/gauge_alg_ctest.cpp
@@ -102,7 +102,7 @@ class GaugeAlgTest : public ::testing::Test {
 protected:
   QudaGaugeParam param;
 
-  device_timer_t device_timer_1, device_timer_2;
+  host_timer_t host_timer_1, host_timer_2;
   double2 detu;
   double3 plaq;
   cudaGaugeField *U;
@@ -164,7 +164,7 @@ class GaugeAlgTest : public ::testing::Test {
       int *num_failures_d = (int *)get_mapped_device_pointer(num_failures_h);
       SetReunitarizationConsts();
 
-      device_timer_1.start();
+      host_timer_1.start();
 
       // If no field is loaded, create a physical quenched field on the device
       if (!gauge_load) {
@@ -187,7 +187,7 @@ class GaugeAlgTest : public ::testing::Test {
         novrsteps = heatbath_num_overrelax_per_step;
         coldstart = heatbath_coldstart;
         beta_value = heatbath_beta_value;
-        device_timer_2.start();
+        host_timer_2.start();
 
         if (coldstart)
           InitGaugeField(*U);
@@ -207,8 +207,8 @@ class GaugeAlgTest : public ::testing::Test {
           printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z);
         }
 
-        device_timer_2.stop();
-        printfQuda("Time Monte -> %.6f s\n", device_timer_2.last());
+        host_timer_2.stop();
+        printfQuda("Time Monte -> %.6f s\n", host_timer_2.last());
       } else {
 
         // If a field is loaded, create a device field and copy
@@ -276,8 +276,8 @@ class GaugeAlgTest : public ::testing::Test {
       // Release all temporary memory used for data exchange between GPUs in multi-GPU mode
       PGaugeExchangeFree();
 
-      device_timer_1.stop();
-      printfQuda("Time -> %.6f s\n", device_timer_1.last());
+      host_timer_1.stop();
+      printfQuda("Time -> %.6f s\n", host_timer_1.last());
     }
     // If we performed a specific instance, switch off the
     // Google testing.

From 97b4c0b55924d533306ac18d0a98c45ea953418e Mon Sep 17 00:00:00 2001
From: cpviolator <dmhowarth26@gmail.com>
Date: Fri, 29 Oct 2021 17:45:32 -0700
Subject: [PATCH 22/32] Clang tidy

---
 include/enum_quda_fortran.h |  1 -
 include/gauge_tools.h       |  9 ++++--
 include/quda.h              | 13 ++++----
 lib/check_params.h          |  1 -
 lib/interface_quda.cpp      | 30 ++++++++++--------
 lib/milc_interface.cpp      | 14 ++++-----
 tests/gauge_alg_ctest.cpp   | 61 +++++++++++++++++++------------------
 tests/su3_test.cpp          |  2 +-
 8 files changed, 69 insertions(+), 62 deletions(-)

diff --git a/include/enum_quda_fortran.h b/include/enum_quda_fortran.h
index 7614e2bbdc..9207e265af 100644
--- a/include/enum_quda_fortran.h
+++ b/include/enum_quda_fortran.h
@@ -503,7 +503,6 @@
 #define QUDA_GAUGEFIX_TYPE_FFT 1
 #define QUDA_GAUGEFIX_TYPE_INVALID QUDA_INVALID_ENUM
 
-
 #define QudaExtLibType integer(4)
 #define QUDA_CUSOLVE_EXTLIB 0
 #define QUDA_EIGEN_EXTLIB 1
diff --git a/include/gauge_tools.h b/include/gauge_tools.h
index 25b1691195..e067f1b7f2 100644
--- a/include/gauge_tools.h
+++ b/include/gauge_tools.h
@@ -124,7 +124,8 @@ namespace quda
    * @param[in] theta_condition, QUDA_BOOLEAN_FALSE for MILC criterion and QUDA_BOOLEAN_TRUE to use the theta value
    */
   void gaugeFixingOVR(GaugeField &data, const int gauge_dir, const int steps, const int verbose_interval,
-                      const double relax_boost, const double tolerance, const int reunit_interval, const QudaBoolean theta_condition);
+                      const double relax_boost, const double tolerance, const int reunit_interval,
+                      const QudaBoolean theta_condition);
 
   /**
    * @brief Gauge fixing with Steepest descent method with FFTs with support for single GPU only.
@@ -133,14 +134,16 @@ namespace quda
    * @param[in] steps, maximum number of steps to perform gauge fixing
    * @param[in] verbose_interval, print gauge fixing info when iteration count is a multiple of this
    * @param[in] alpha, gauge fixing parameter of the method, most common value is 0.08
-   * @param[in] autotune, QUDA_BOOLEAN_TRUE to autotune the method, i.e., if the fix quality inverts its tendency we decrease the alpha value
+   * @param[in] autotune, QUDA_BOOLEAN_TRUE to autotune the method, i.e., if the fix quality inverts its tendency we
+   * decrease the alpha value
    * @param[in] tolerance, torelance value to stop the method, if this
    * value is zero then the method stops when iteration reachs the
    * maximum number of steps defined by steps
    * @param[in] theta_condition, QUDA_BOOLEAN_FALSE for MILC criterion and QUDA_BOOLEAN_TRUE to use the theta value
    */
   void gaugeFixingFFT(GaugeField &data, const int gauge_dir, const int steps, const int verbose_interval,
-                      const double alpha, const QudaBoolean autotune, const double tolerance, const QudaBoolean theta_condition);
+                      const double alpha, const QudaBoolean autotune, const double tolerance,
+                      const QudaBoolean theta_condition);
 
   /**
      @brief Compute the Fmunu tensor
diff --git a/include/quda.h b/include/quda.h
index 3401cc4f67..e4c34e0029 100644
--- a/include/quda.h
+++ b/include/quda.h
@@ -791,10 +791,10 @@ extern "C" {
     QudaBoolean fft_autotune;    /**< Autotune the Alpha parameter in the FFT method (default true) */
     int reunit_interval;         /**< Reunitarise the gauge field every N steps (default 10) */
     double tolerance;            /**< The tolerance of the gauge fixing quality (default 1e-6) */
-    QudaBoolean theta_condition; /**< "Use the theta value to determine the gauge fixing if true. If false, use the delta value (default false)" */
+    QudaBoolean theta_condition; /**< "Use the theta value to determine the gauge fixing if true. If false, use the
+                                    delta value (default false)" */
   } QudaGaugeFixParam;
 
-  
   /*
    * Interface functions, found in interface_quda.cpp
    */
@@ -1021,7 +1021,7 @@ extern "C" {
    * @param param The QudaGaugeFixParam whose elements we are to print.
    */
   void printQudaGaugeFixParam(QudaGaugeFixParam *param);
-  
+
   /**
    * Load the gauge field from the host.
    * @param h_gauge Base pointer to host gauge field (regardless of dimensionality)
@@ -1536,14 +1536,15 @@ extern "C" {
                     const int *X);
 
   /**
-   * @brief Gauge fixing with overrelaxation with support for single and multi GPU, and steepest descent FFT with support for single GPU only.
+   * @brief Gauge fixing with overrelaxation with support for single and multi GPU, and steepest descent FFT with
+   * support for single GPU only.
    * @param[in,out] gauge, gauge field to be fixed
    * @param[in] gauge_param The parameters of the external fields and the computation settings
    * @param[in] fix_param Container for the gauge fixing algorithm and parameters to use.
-   * @param[out] timeinfo Array to track timings 
+   * @param[out] timeinfo Array to track timings
    */
   int computeGaugeFixingQuda(void *gauge, QudaGaugeParam *gauge_param, QudaGaugeFixParam *fix_param, double *timeinfo);
-  
+
   /**
    * @brief Strided Batched GEMM
    * @param[in] arrayA The array containing the A matrix data
diff --git a/lib/check_params.h b/lib/check_params.h
index 28e8370fa6..be21e3c8fb 100644
--- a/lib/check_params.h
+++ b/lib/check_params.h
@@ -1092,7 +1092,6 @@ void printQudaGaugeFixParam(QudaGaugeFixParam *param)
 #endif
 }
 
-
 // clean up
 #undef INVALID_INT
 #undef INVALID_DOUBLE
diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp
index 985c985f9e..9829634ac4 100644
--- a/lib/interface_quda.cpp
+++ b/lib/interface_quda.cpp
@@ -5554,7 +5554,7 @@ void performWFlownStep(unsigned int n_steps, double step_size, int meas_interval
 }
 
 int computeGaugeFixingQuda(void *gauge, QudaGaugeParam *g_param, QudaGaugeFixParam *fix_param, double *timeinfo)
-{  
+{
   profileGaugeFix.TPSTART(QUDA_PROFILE_TOTAL);
 
   // Check parameters
@@ -5576,40 +5576,44 @@ int computeGaugeFixingQuda(void *gauge, QudaGaugeParam *g_param, QudaGaugeFixPar
   profileGaugeFix.TPSTART(QUDA_PROFILE_H2D);
   cudaInGauge->loadCPUField(*cpuGauge);
   profileGaugeFix.TPSTOP(QUDA_PROFILE_H2D);
-  
+
   // Perform the update
-  switch(fix_param->fix_type) {
-    
+  switch (fix_param->fix_type) {
+
   case QUDA_GAUGEFIX_TYPE_OVR:
     if (comm_size() == 1) {
       profileGaugeFix.TPSTART(QUDA_PROFILE_COMPUTE);
-      gaugeFixingOVR(*cudaInGauge, fix_param->gauge_dir, fix_param->maxiter, fix_param->verbosity_interval, fix_param->ovr_relaxation_boost, fix_param->tolerance, fix_param->reunit_interval, fix_param->theta_condition);
+      gaugeFixingOVR(*cudaInGauge, fix_param->gauge_dir, fix_param->maxiter, fix_param->verbosity_interval,
+                     fix_param->ovr_relaxation_boost, fix_param->tolerance, fix_param->reunit_interval,
+                     fix_param->theta_condition);
       profileGaugeFix.TPSTOP(QUDA_PROFILE_COMPUTE);
     } else {
       // For MPI, we must perform a halo exchange
       cudaGaugeField *cudaInGaugeEx = createExtendedGauge(*cudaInGauge, R, profileGaugeFix);
       profileGaugeFix.TPSTART(QUDA_PROFILE_COMPUTE);
-      gaugeFixingOVR(*cudaInGaugeEx, fix_param->gauge_dir, fix_param->maxiter, fix_param->verbosity_interval, fix_param->ovr_relaxation_boost, fix_param->tolerance, fix_param->reunit_interval, fix_param->theta_condition);
+      gaugeFixingOVR(*cudaInGaugeEx, fix_param->gauge_dir, fix_param->maxiter, fix_param->verbosity_interval,
+                     fix_param->ovr_relaxation_boost, fix_param->tolerance, fix_param->reunit_interval,
+                     fix_param->theta_condition);
       profileGaugeFix.TPSTOP(QUDA_PROFILE_COMPUTE);
       copyExtendedGauge(*cudaInGauge, *cudaInGaugeEx, QUDA_CUDA_FIELD_LOCATION);
     }
     break;
-    
+
   case QUDA_GAUGEFIX_TYPE_FFT:
     profileGaugeFix.TPSTART(QUDA_PROFILE_COMPUTE);
-    gaugeFixingFFT(*cudaInGauge, fix_param->gauge_dir, fix_param->maxiter, fix_param->verbosity_interval, fix_param->fft_alpha, fix_param->fft_autotune, fix_param->tolerance, fix_param->theta_condition);
+    gaugeFixingFFT(*cudaInGauge, fix_param->gauge_dir, fix_param->maxiter, fix_param->verbosity_interval,
+                   fix_param->fft_alpha, fix_param->fft_autotune, fix_param->tolerance, fix_param->theta_condition);
     profileGaugeFix.TPSTOP(QUDA_PROFILE_COMPUTE);
     break;
-    
-  default:
-    errorQuda("Unkown gauge fix type %d", fix_param->fix_type);
+
+  default: errorQuda("Unkown gauge fix type %d", fix_param->fix_type);
   }
-  
+
   // Copy the fixed gauge field back to the host
   profileGaugeFix.TPSTART(QUDA_PROFILE_D2H);
   cudaInGauge->saveCPUField(*cpuGauge);
   profileGaugeFix.TPSTOP(QUDA_PROFILE_D2H);
-  
+
   profileGaugeFix.TPSTOP(QUDA_PROFILE_TOTAL);
   if (g_param->make_resident_gauge) {
     if (gaugePrecise != nullptr) delete gaugePrecise;
diff --git a/lib/milc_interface.cpp b/lib/milc_interface.cpp
index e752cc0a2f..3cac52afcf 100644
--- a/lib/milc_interface.cpp
+++ b/lib/milc_interface.cpp
@@ -2698,9 +2698,8 @@ void qudaCloverMultishiftInvert(int external_precision, int quda_precision, int
 void qudaGaugeFixingOVR(int precision, unsigned int gauge_dir, int Nsteps, int verbose_interval, double relax_boost,
                         double tolerance, unsigned int reunit_interval, unsigned int stopWtheta, void *milc_sitelink)
 {
-  QudaGaugeParam gauge_param = newMILCGaugeParam(localDim,
-      (precision==1) ? QUDA_SINGLE_PRECISION : QUDA_DOUBLE_PRECISION,
-      QUDA_SU3_LINKS);
+  QudaGaugeParam gauge_param
+    = newMILCGaugeParam(localDim, (precision == 1) ? QUDA_SINGLE_PRECISION : QUDA_DOUBLE_PRECISION, QUDA_SU3_LINKS);
   gauge_param.reconstruct = QUDA_RECONSTRUCT_NO;
   //qudaGaugeParam.reconstruct = QUDA_RECONSTRUCT_12;
 
@@ -2712,7 +2711,7 @@ void qudaGaugeFixingOVR(int precision, unsigned int gauge_dir, int Nsteps, int v
   fix_param.tolerance = tolerance;
   fix_param.reunit_interval = reunit_interval;
   fix_param.theta_condition = stopWtheta == 0 ? QUDA_BOOLEAN_FALSE : QUDA_BOOLEAN_TRUE;
-  
+
   double timeinfo[3];
   computeGaugeFixingQuda(milc_sitelink, &gauge_param, &fix_param, timeinfo);
 
@@ -2733,9 +2732,8 @@ void qudaGaugeFixingFFT( int precision,
     void* milc_sitelink
     )
 {
-  QudaGaugeParam gauge_param = newMILCGaugeParam(localDim,
-      (precision==1) ? QUDA_SINGLE_PRECISION : QUDA_DOUBLE_PRECISION,
-      QUDA_GENERAL_LINKS);
+  QudaGaugeParam gauge_param
+    = newMILCGaugeParam(localDim, (precision == 1) ? QUDA_SINGLE_PRECISION : QUDA_DOUBLE_PRECISION, QUDA_GENERAL_LINKS);
   gauge_param.reconstruct = QUDA_RECONSTRUCT_NO;
   //qudaGaugeParam.reconstruct = QUDA_RECONSTRUCT_12;
 
@@ -2750,7 +2748,7 @@ void qudaGaugeFixingFFT( int precision,
 
   double timeinfo[3];
   computeGaugeFixingQuda(milc_sitelink, &gauge_param, &fix_param, timeinfo);
-  
+
   printfQuda("Time H2D: %lf\n", timeinfo[0]);
   printfQuda("Time to Compute: %lf\n", timeinfo[1]);
   printfQuda("Time D2H: %lf\n", timeinfo[2]);
diff --git a/tests/gauge_alg_ctest.cpp b/tests/gauge_alg_ctest.cpp
index 6f52545616..a1e3c60507 100644
--- a/tests/gauge_alg_ctest.cpp
+++ b/tests/gauge_alg_ctest.cpp
@@ -74,9 +74,8 @@ QudaGaugeFixType fix_type = QUDA_GAUGEFIX_TYPE_OVR;
 
 void add_gaugefix_option_group(std::shared_ptr<QUDAApp> quda_app)
 {
-  CLI::TransformPairs<QudaGaugeFixType> fix_type_map {{"ovr", QUDA_GAUGEFIX_TYPE_OVR},
-                                                      {"fft", QUDA_GAUGEFIX_TYPE_FFT}};
-  
+  CLI::TransformPairs<QudaGaugeFixType> fix_type_map {{"ovr", QUDA_GAUGEFIX_TYPE_OVR}, {"fft", QUDA_GAUGEFIX_TYPE_FFT}};
+
   // Option group for gauge fixing related options
   auto opgroup = quda_app->add_option_group("gaugefix", "Options controlling gauge fixing tests");
   opgroup->add_option("--gf-dir", gf_gauge_dir,
@@ -88,16 +87,20 @@ void add_gaugefix_option_group(std::shared_ptr<QUDAApp> quda_app)
   opgroup->add_option("--gf-ovr-relaxation-boost", gf_ovr_relaxation_boost,
                       "The overrelaxation boost parameter for the overrelaxation method (default 1.5)");
   opgroup->add_option("--gf-fft-alpha", gf_fft_alpha, "The Alpha parameter in the FFT method (default 0.8)");
-  opgroup->add_option("--gf-fft-autotune", gf_fft_autotune, "Autotune the Alpha parameter in the FFT method (default true)");
+  opgroup->add_option("--gf-fft-autotune", gf_fft_autotune,
+                      "Autotune the Alpha parameter in the FFT method (default true)");
   opgroup->add_option("--gf-reunit-interval", gf_reunit_interval,
                       "Reunitarise the gauge field every N steps (default 10)");
   opgroup->add_option("--gf-tol", gf_tolerance, "The tolerance of the gauge fixing quality (default 1e-6)");
-  opgroup->add_option("--gf-theta-condition", gf_theta_condition,
-		      "Use the theta value to determine the gauge fixing if true. If false, use the delta value (default false)");
-  opgroup->add_option("--gf-fix-type", fix_type, "The type of algorithm to use for fixing (default ovr)")->transform(CLI::QUDACheckedTransformer(fix_type_map));
+  opgroup->add_option(
+    "--gf-theta-condition", gf_theta_condition,
+    "Use the theta value to determine the gauge fixing if true. If false, use the delta value (default false)");
+  opgroup->add_option("--gf-fix-type", fix_type, "The type of algorithm to use for fixing (default ovr)")
+    ->transform(CLI::QUDACheckedTransformer(fix_type_map));
 }
 
-class GaugeAlgTest : public ::testing::Test {
+class GaugeAlgTest : public ::testing::Test
+{
 
 protected:
   QudaGaugeParam param;
@@ -111,19 +114,17 @@ class GaugeAlgTest : public ::testing::Test {
   int novrsteps;
   bool coldstart;
   double beta_value;
-  RNG * randstates;
-  
-  void SetReunitarizationConsts(){
+  RNG *randstates;
+
+  void SetReunitarizationConsts()
+  {
     const double unitarize_eps = 1e-14;
     const double max_error = 1e-10;
     const int reunit_allow_svd = 1;
-    const int reunit_svd_only  = 0;
+    const int reunit_svd_only = 0;
     const double svd_rel_error = 1e-6;
     const double svd_abs_error = 1e-6;
-    setUnitarizeLinksConstants(unitarize_eps, max_error,
-                               reunit_allow_svd, reunit_svd_only,
-                               svd_rel_error, svd_abs_error);
-
+    setUnitarizeLinksConstants(unitarize_eps, max_error, reunit_allow_svd, reunit_svd_only, svd_rel_error, svd_abs_error);
   }
 
   bool checkDimsPartitioned()
@@ -133,8 +134,9 @@ class GaugeAlgTest : public ::testing::Test {
     return false;
   }
 
-  bool comparePlaquette(double3 a, double3 b){
-    double a0,a1,a2;
+  bool comparePlaquette(double3 a, double3 b)
+  {
+    double a0, a1, a2;
     a0 = std::abs(a.x - b.x);
     a1 = std::abs(a.y - b.y);
     a2 = std::abs(a.z - b.z);
@@ -143,7 +145,8 @@ class GaugeAlgTest : public ::testing::Test {
     return ((a0 < prec_val) && (a1 < prec_val) && (a2 < prec_val));
   }
 
-  bool CheckDeterminant(double2 detu){
+  bool CheckDeterminant(double2 detu)
+  {
     double prec_val = 5e-8;
     if (prec == QUDA_DOUBLE_PRECISION) prec_val = gf_tolerance * 1e2;
     return (std::abs(1.0 - detu.x) < prec_val && std::abs(detu.y) < prec_val);
@@ -153,7 +156,7 @@ class GaugeAlgTest : public ::testing::Test {
   {
     if (execute) {
       setVerbosity(QUDA_VERBOSE);
-      
+
       // Setup gauge container.
       param = newQudaGaugeParam();
       setWilsonGaugeParam(param);
@@ -236,7 +239,7 @@ class GaugeAlgTest : public ::testing::Test {
           U = new cudaGaugeField(gauge_field_param);
           U->copy(*host);
         }
-	
+
         delete host;
 
         // Reunitarization
@@ -259,7 +262,7 @@ class GaugeAlgTest : public ::testing::Test {
       case 2: run_fft(); break;
       default: errorQuda("Invalid test type %d ", test_type);
       }
-      
+
       host_free(num_failures_h);
     }
   }
@@ -283,11 +286,11 @@ class GaugeAlgTest : public ::testing::Test {
     // Google testing.
     if (test_type != 0) execute = false;
   }
-  
+
   virtual void run_ovr()
   {
     if (execute) {
-      printfQuda("%s gauge fixing with overrelaxation method\n",  gf_gauge_dir == 4 ? "Landau" : "Coulomb");
+      printfQuda("%s gauge fixing with overrelaxation method\n", gf_gauge_dir == 4 ? "Landau" : "Coulomb");
       gaugeFixingOVR(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance,
                      gf_reunit_interval, gf_theta_condition);
       auto plaq_gf = plaquette(*U);
@@ -304,7 +307,7 @@ class GaugeAlgTest : public ::testing::Test {
     if (execute) {
       if (!checkDimsPartitioned()) {
         printfQuda("%s gauge fixing with steepest descent method with FFT\n", gf_gauge_dir == 4 ? "Landau" : "Coulomb");
-	gaugeFixingFFT(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance,
+        gaugeFixingFFT(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance,
                        gf_theta_condition);
 
         auto plaq_gf = plaquette(*U);
@@ -411,7 +414,7 @@ TEST_F(GaugeAlgTest, Coulomb_FFT)
       printfQuda("Coulomb gauge fixing with steepest descent method with FFT\n");
       gaugeFixingFFT(*U, 3, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance,
                      gf_theta_condition);
-auto plaq_gf = plaquette(*U);
+      auto plaq_gf = plaquette(*U);
       printfQuda("Plaq:    %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z);
       printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
       ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
@@ -425,7 +428,7 @@ int main(int argc, char **argv)
   // initalize google test, includes command line options
   ::testing::InitGoogleTest(&argc, argv);
 
-  // command line options  
+  // command line options
   auto app = make_app();
   add_gaugefix_option_group(app);
   add_heatbath_option_group(app);
@@ -488,6 +491,6 @@ int main(int argc, char **argv)
   endQuda();
 
   finalizeComms();
-  
-  return test_rc;  
+
+  return test_rc;
 }
diff --git a/tests/su3_test.cpp b/tests/su3_test.cpp
index 9f166b6ca6..44b12bb528 100644
--- a/tests/su3_test.cpp
+++ b/tests/su3_test.cpp
@@ -94,7 +94,7 @@ void add_su3_option_group(std::shared_ptr<QUDAApp> quda_app)
 
   opgroup->add_option("--su3-wflow-type", wflow_type, "The type of action to use in the wilson flow (default wilson)")
     ->transform(CLI::QUDACheckedTransformer(wflow_type_map));
-  
+
   opgroup->add_option("--su3-measurement-interval", measurement_interval,
                       "Measure the field energy and topological charge every Nth step (default 5) ");
 }

From c7801b1c70130696be18e78c5b6a40dfe41eea49 Mon Sep 17 00:00:00 2001
From: cpviolator <dmhowarth26@gmail.com>
Date: Fri, 29 Oct 2021 20:37:19 -0700
Subject: [PATCH 23/32] typo in Doxygen comment

---
 include/quda.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/quda.h b/include/quda.h
index e4c34e0029..7650bf38be 100644
--- a/include/quda.h
+++ b/include/quda.h
@@ -784,7 +784,7 @@ extern "C" {
 
     QudaGaugeFixType fix_type;   /**< The aglorithm to use for gauge fixing */
     int gauge_dir;               /**< The orthogonal direction of the gauge fixing, 3=Coulomb, 4=Landau. (default 4) */
-    int maxiter;                 /**< The maximun number of gauge fixing iterations to be applied (default 10000) */
+    int maxiter;                 /**< The maximum number of gauge fixing iterations to be applied (default 10000) */
     int verbosity_interval;      /**< Print the gauge fixing progress every N steps (default 100) */
     double ovr_relaxation_boost; /**< The overrelaxation boost parameter for the overrelaxation method (default 1.5) */
     double fft_alpha;            /**< The Alpha parameter in the FFT method (default 0.8) */

From 35ee71b65ccb8d905bc63648bf72c52711c35274 Mon Sep 17 00:00:00 2001
From: cpviolator <dmhowarth26@gmail.com>
Date: Mon, 1 Nov 2021 15:22:10 -0700
Subject: [PATCH 24/32] Rename gauge_alg to original

---
 tests/CMakeLists.txt                              | 10 +++++-----
 tests/{gauge_alg_ctest.cpp => gauge_alg_test.cpp} |  0
 2 files changed, 5 insertions(+), 5 deletions(-)
 rename tests/{gauge_alg_ctest.cpp => gauge_alg_test.cpp} (100%)

diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 4beabb3d81..dcd7c8fa4b 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -223,10 +223,10 @@ if(QUDA_FORCE_GAUGE)
 endif()
 
 if(QUDA_GAUGE_ALG)
-  add_executable(gauge_alg_ctest gauge_alg_ctest.cpp)
-  target_link_libraries(gauge_alg_ctest ${TEST_LIBS})
-  quda_checkbuildtest(gauge_alg_ctest QUDA_BUILD_ALL_TESTS)
-  install(TARGETS gauge_alg_ctest ${QUDA_EXCLUDE_FROM_INSTALL} DESTINATION ${CMAKE_INSTALL_BINDIR})
+  add_executable(gauge_alg_test gauge_alg_test.cpp)
+  target_link_libraries(gauge_alg_test ${TEST_LIBS})
+  quda_checkbuildtest(gauge_alg_test QUDA_BUILD_ALL_TESTS)
+  install(TARGETS gauge_alg_test ${QUDA_EXCLUDE_FROM_INSTALL} DESTINATION ${CMAKE_INSTALL_BINDIR})
 
   add_executable(heatbath_test heatbath_test.cpp)
   target_link_libraries(heatbath_test ${TEST_LIBS})
@@ -812,7 +812,7 @@ foreach(prec IN LISTS TEST_PRECS)
 
   if(QUDA_GAUGE_ALG)
     add_test(NAME gauge_alg_${prec}
-             COMMAND ${QUDA_CTEST_LAUNCH} $<TARGET_FILE:gauge_alg_ctest> ${MPIEXEC_POSTFLAGS}
+             COMMAND ${QUDA_CTEST_LAUNCH} $<TARGET_FILE:gauge_alg_test> ${MPIEXEC_POSTFLAGS}
                      --dim 4 6 8 10 --prec ${prec}
                      --gtest_output=xml:gauge_arg_test_${prec}.xml)
   endif()
diff --git a/tests/gauge_alg_ctest.cpp b/tests/gauge_alg_test.cpp
similarity index 100%
rename from tests/gauge_alg_ctest.cpp
rename to tests/gauge_alg_test.cpp

From 46bc1a01cc441458f9fbf018b026a4227d3ed922 Mon Sep 17 00:00:00 2001
From: cpviolator <dmhowarth26@gmail.com>
Date: Thu, 16 Dec 2021 14:07:51 -0800
Subject: [PATCH 25/32] Add support for host and device gauge input

---
 lib/check_params.h         |  15 +---
 lib/interface_quda.cpp     |  82 +++++++++++-------
 tests/gauge_alg_test.cpp   | 167 +++++++++++++++++++------------------
 tests/utils/host_utils.cpp |   1 +
 tests/utils/misc.cpp       |  13 +++
 tests/utils/misc.h         |   1 +
 6 files changed, 158 insertions(+), 121 deletions(-)

diff --git a/lib/check_params.h b/lib/check_params.h
index be21e3c8fb..1e09c901be 100644
--- a/lib/check_params.h
+++ b/lib/check_params.h
@@ -1065,17 +1065,6 @@ void printQudaGaugeFixParam(QudaGaugeFixParam *param)
   P(struct_size, (size_t)INVALID_INT);
 #endif
 
-#ifdef INIT_PARAM
-  P(gauge_dir, 4);
-  P(maxiter, 10000);
-  P(verbosity_interval, 100);
-  P(reunit_interval, 10);
-  P(ovr_relaxation_boost, 0.0);
-  P(fft_alpha, 0.0);
-  P(tolerance, 0.0);
-  P(fft_autotune, QUDA_BOOLEAN_FALSE);
-  P(theta_condition, QUDA_BOOLEAN_FALSE);
-#else
   P(gauge_dir, INVALID_INT);
   P(maxiter, INVALID_INT);
   P(verbosity_interval, INVALID_INT);
@@ -1083,10 +1072,12 @@ void printQudaGaugeFixParam(QudaGaugeFixParam *param)
   P(ovr_relaxation_boost, INVALID_DOUBLE);
   P(fft_alpha, INVALID_DOUBLE);
   P(tolerance, INVALID_DOUBLE);
+
+#ifndef CHECK_PARAM
   P(fft_autotune, QUDA_BOOLEAN_FALSE);
   P(theta_condition, QUDA_BOOLEAN_FALSE);
 #endif
-
+  
 #ifdef INIT_PARAM
   return ret;
 #endif
diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp
index 9829634ac4..d8ae3d21e9 100644
--- a/lib/interface_quda.cpp
+++ b/lib/interface_quda.cpp
@@ -5557,51 +5557,73 @@ int computeGaugeFixingQuda(void *gauge, QudaGaugeParam *g_param, QudaGaugeFixPar
 {
   profileGaugeFix.TPSTART(QUDA_PROFILE_TOTAL);
 
+  if (!initialized) errorQuda("QUDA not initialized");
+  printQudaGaugeParam(g_param);
+  printQudaGaugeFixParam(fix_param);
+  
   // Check parameters
   checkGaugeParam(g_param);
   checkGaugeFixParam(fix_param);
 
+  cudaGaugeField *device_gauge = nullptr;
+  cpuGaugeField *cpu_gauge = nullptr;
+  
   // Create host and device fields
-  profileGaugeFix.TPSTART(QUDA_PROFILE_INIT);
-  GaugeFieldParam gauge_param(*g_param, gauge);
-  auto *cpuGauge = new cpuGaugeField(gauge_param);
-  gauge_param.create = QUDA_NULL_FIELD_CREATE;
-  gauge_param.link_type = g_param->type;
-  gauge_param.reconstruct = g_param->reconstruct;
-  gauge_param.setPrecision(gauge_param.Precision(), true);
-  auto *cudaInGauge = new cudaGaugeField(gauge_param);
-  profileGaugeFix.TPSTOP(QUDA_PROFILE_INIT);
-
-  // Load gauge to device
-  profileGaugeFix.TPSTART(QUDA_PROFILE_H2D);
-  cudaInGauge->loadCPUField(*cpuGauge);
-  profileGaugeFix.TPSTOP(QUDA_PROFILE_H2D);
-
+  if(g_param->location == QUDA_CPU_FIELD_LOCATION) {
+    // The gauge field is on the CPU. We must
+    // create a GPU gauge and transfer. 
+    profileGaugeFix.TPSTART(QUDA_PROFILE_INIT);
+    GaugeFieldParam gauge_param(*g_param, gauge);
+    cpu_gauge = new cpuGaugeField(gauge_param);
+    gauge_param.create = QUDA_NULL_FIELD_CREATE;
+    gauge_param.link_type = g_param->type;
+    gauge_param.reconstruct = g_param->reconstruct;
+    gauge_param.setPrecision(gauge_param.Precision(), true);
+    device_gauge = new cudaGaugeField(gauge_param);
+    profileGaugeFix.TPSTOP(QUDA_PROFILE_INIT);
+
+    // Load gauge to device
+    profileGaugeFix.TPSTART(QUDA_PROFILE_H2D);
+    device_gauge->loadCPUField(*cpu_gauge);
+    profileGaugeFix.TPSTOP(QUDA_PROFILE_H2D);
+  } else {
+    // The gauge field is on the GPU already, so
+    // we can just reference that field.
+    profileGaugeFix.TPSTART(QUDA_PROFILE_INIT);
+    GaugeFieldParam gauge_param(*g_param, gauge);
+    gauge_param.create = QUDA_REFERENCE_FIELD_CREATE;
+    gauge_param.link_type = g_param->type;
+    gauge_param.reconstruct = g_param->reconstruct;
+    gauge_param.setPrecision(gauge_param.Precision(), true);
+    device_gauge = new cudaGaugeField(gauge_param);    
+    profileGaugeFix.TPSTOP(QUDA_PROFILE_INIT);
+  }    
+  
   // Perform the update
   switch (fix_param->fix_type) {
 
   case QUDA_GAUGEFIX_TYPE_OVR:
     if (comm_size() == 1) {
       profileGaugeFix.TPSTART(QUDA_PROFILE_COMPUTE);
-      gaugeFixingOVR(*cudaInGauge, fix_param->gauge_dir, fix_param->maxiter, fix_param->verbosity_interval,
+      gaugeFixingOVR(*device_gauge, fix_param->gauge_dir, fix_param->maxiter, fix_param->verbosity_interval,
                      fix_param->ovr_relaxation_boost, fix_param->tolerance, fix_param->reunit_interval,
                      fix_param->theta_condition);
       profileGaugeFix.TPSTOP(QUDA_PROFILE_COMPUTE);
     } else {
       // For MPI, we must perform a halo exchange
-      cudaGaugeField *cudaInGaugeEx = createExtendedGauge(*cudaInGauge, R, profileGaugeFix);
+      cudaGaugeField *device_gauge_extended = createExtendedGauge(*device_gauge, R, profileGaugeFix);
       profileGaugeFix.TPSTART(QUDA_PROFILE_COMPUTE);
-      gaugeFixingOVR(*cudaInGaugeEx, fix_param->gauge_dir, fix_param->maxiter, fix_param->verbosity_interval,
+      gaugeFixingOVR(*device_gauge_extended, fix_param->gauge_dir, fix_param->maxiter, fix_param->verbosity_interval,
                      fix_param->ovr_relaxation_boost, fix_param->tolerance, fix_param->reunit_interval,
                      fix_param->theta_condition);
       profileGaugeFix.TPSTOP(QUDA_PROFILE_COMPUTE);
-      copyExtendedGauge(*cudaInGauge, *cudaInGaugeEx, QUDA_CUDA_FIELD_LOCATION);
+      copyExtendedGauge(*device_gauge, *device_gauge_extended, QUDA_CUDA_FIELD_LOCATION);
     }
     break;
 
   case QUDA_GAUGEFIX_TYPE_FFT:
     profileGaugeFix.TPSTART(QUDA_PROFILE_COMPUTE);
-    gaugeFixingFFT(*cudaInGauge, fix_param->gauge_dir, fix_param->maxiter, fix_param->verbosity_interval,
+    gaugeFixingFFT(*device_gauge, fix_param->gauge_dir, fix_param->maxiter, fix_param->verbosity_interval,
                    fix_param->fft_alpha, fix_param->fft_autotune, fix_param->tolerance, fix_param->theta_condition);
     profileGaugeFix.TPSTOP(QUDA_PROFILE_COMPUTE);
     break;
@@ -5609,19 +5631,23 @@ int computeGaugeFixingQuda(void *gauge, QudaGaugeParam *g_param, QudaGaugeFixPar
   default: errorQuda("Unkown gauge fix type %d", fix_param->fix_type);
   }
 
-  // Copy the fixed gauge field back to the host
-  profileGaugeFix.TPSTART(QUDA_PROFILE_D2H);
-  cudaInGauge->saveCPUField(*cpuGauge);
-  profileGaugeFix.TPSTOP(QUDA_PROFILE_D2H);
-
+  // Copy the fixed gauge field back to the host if it came
+  // from the host
+  if(g_param->location == QUDA_CPU_FIELD_LOCATION) {
+    profileGaugeFix.TPSTART(QUDA_PROFILE_D2H);
+    device_gauge->saveCPUField(*cpu_gauge);
+    profileGaugeFix.TPSTOP(QUDA_PROFILE_D2H);
+  }
+  
   profileGaugeFix.TPSTOP(QUDA_PROFILE_TOTAL);
+
   if (g_param->make_resident_gauge) {
     if (gaugePrecise != nullptr) delete gaugePrecise;
-    gaugePrecise = cudaInGauge;
+    gaugePrecise = device_gauge;
   } else {
-    delete cudaInGauge;
+    delete device_gauge;
   }
-
+  
   if(timeinfo){
     timeinfo[0] = profileGaugeFix.Last(QUDA_PROFILE_H2D);
     timeinfo[1] = profileGaugeFix.Last(QUDA_PROFILE_COMPUTE);
diff --git a/tests/gauge_alg_test.cpp b/tests/gauge_alg_test.cpp
index a1e3c60507..a0a068d8a9 100644
--- a/tests/gauge_alg_test.cpp
+++ b/tests/gauge_alg_test.cpp
@@ -66,11 +66,11 @@ int gf_maxiter = 10000;
 int gf_verbosity_interval = 100;
 double gf_ovr_relaxation_boost = 1.5;
 double gf_fft_alpha = 0.8;
-QudaBoolean gf_fft_autotune = QUDA_BOOLEAN_TRUE;
+bool gf_fft_autotune = true;
 int gf_reunit_interval = 10;
 double gf_tolerance = 1e-6;
-QudaBoolean gf_theta_condition = QUDA_BOOLEAN_FALSE;
-QudaGaugeFixType fix_type = QUDA_GAUGEFIX_TYPE_OVR;
+bool gf_theta_condition = false;
+QudaGaugeFixType gf_fix_type = QUDA_GAUGEFIX_TYPE_OVR;
 
 void add_gaugefix_option_group(std::shared_ptr<QUDAApp> quda_app)
 {
@@ -95,15 +95,29 @@ void add_gaugefix_option_group(std::shared_ptr<QUDAApp> quda_app)
   opgroup->add_option(
     "--gf-theta-condition", gf_theta_condition,
     "Use the theta value to determine the gauge fixing if true. If false, use the delta value (default false)");
-  opgroup->add_option("--gf-fix-type", fix_type, "The type of algorithm to use for fixing (default ovr)")
+  opgroup->add_option("--gf-fix-type", gf_fix_type, "The type of algorithm to use for fixing (default ovr)")
     ->transform(CLI::QUDACheckedTransformer(fix_type_map));
 }
 
+void setGaugeFixParam(QudaGaugeFixParam &fix_param) {
+  fix_param.fix_type = gf_fix_type;
+  fix_param.gauge_dir = gf_gauge_dir;
+  fix_param.maxiter = gf_maxiter;
+  fix_param.verbosity_interval = gf_verbosity_interval;
+  fix_param.reunit_interval = gf_reunit_interval;
+  fix_param.tolerance = gf_tolerance;
+  fix_param.ovr_relaxation_boost = gf_ovr_relaxation_boost;
+  fix_param.fft_alpha = gf_fft_alpha;
+  fix_param.fft_autotune = gf_fft_alpha ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE;
+  fix_param.theta_condition = gf_theta_condition ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE;
+}
+
 class GaugeAlgTest : public ::testing::Test
 {
 
 protected:
-  QudaGaugeParam param;
+  QudaGaugeParam gauge_param;
+  QudaGaugeFixParam fix_param;
 
   host_timer_t host_timer_1, host_timer_2;
   double2 detu;
@@ -155,12 +169,11 @@ class GaugeAlgTest : public ::testing::Test
   virtual void SetUp()
   {
     if (execute) {
-      setVerbosity(QUDA_VERBOSE);
-
+      
       // Setup gauge container.
-      param = newQudaGaugeParam();
-      setWilsonGaugeParam(param);
-      param.t_boundary = QUDA_PERIODIC_T;
+      gauge_param = newQudaGaugeParam();
+      setWilsonGaugeParam(gauge_param);
+      gauge_param.t_boundary = QUDA_PERIODIC_T;
 
       // Reunitarization setup
       int *num_failures_h = (int *)mapped_malloc(sizeof(int));
@@ -171,17 +184,17 @@ class GaugeAlgTest : public ::testing::Test
 
       // If no field is loaded, create a physical quenched field on the device
       if (!gauge_load) {
-        GaugeFieldParam gParam(param);
-        gParam.ghostExchange = QUDA_GHOST_EXCHANGE_EXTENDED;
-        gParam.create = QUDA_NULL_FIELD_CREATE;
-        gParam.reconstruct = link_recon;
-        gParam.setPrecision(prec, true);
+	GaugeFieldParam device_gauge_param(gauge_param);
+        device_gauge_param.ghostExchange = QUDA_GHOST_EXCHANGE_EXTENDED;
+        device_gauge_param.create = QUDA_NULL_FIELD_CREATE;
+        device_gauge_param.reconstruct = link_recon;
+        device_gauge_param.setPrecision(prec, true);
         for (int d = 0; d < 4; d++) {
-          if (comm_dim_partitioned(d)) gParam.r[d] = 2;
-          gParam.x[d] += 2 * gParam.r[d];
+          if (comm_dim_partitioned(d)) device_gauge_param.r[d] = 2;
+          device_gauge_param.x[d] += 2 * device_gauge_param.r[d];
         }
 
-        U = new cudaGaugeField(gParam);
+        U = new cudaGaugeField(device_gauge_param);
 
         RNG randstates(*U, 1234);
 
@@ -191,12 +204,10 @@ class GaugeAlgTest : public ::testing::Test
         coldstart = heatbath_coldstart;
         beta_value = heatbath_beta_value;
         host_timer_2.start();
-
-        if (coldstart)
-          InitGaugeField(*U);
-        else
-          InitGaugeField(*U, randstates);
-
+	
+        if (coldstart) InitGaugeField(*U);
+        else InitGaugeField(*U, randstates);
+	
         for (int step = 1; step <= nsteps; ++step) {
           printfQuda("Step %d\n", step);
           Monte(*U, randstates, beta_value, nhbsteps, novrsteps);
@@ -216,27 +227,27 @@ class GaugeAlgTest : public ::testing::Test
 
         // If a field is loaded, create a device field and copy
         printfQuda("Copying gauge field from host\n");
-        param.location = QUDA_CPU_FIELD_LOCATION;
-        GaugeFieldParam gauge_field_param(param, host_gauge);
-        gauge_field_param.ghostExchange = QUDA_GHOST_EXCHANGE_NO;
-        GaugeField *host = GaugeField::Create(gauge_field_param);
+        gauge_param.location = QUDA_CPU_FIELD_LOCATION;
+        GaugeFieldParam host_gauge_param(gauge_param, host_gauge);
+        host_gauge_param.ghostExchange = QUDA_GHOST_EXCHANGE_NO;
+        GaugeField *host = GaugeField::Create(host_gauge_param);
 
         // switch the parameters for creating the mirror precise cuda gauge field
-        gauge_field_param.create = QUDA_NULL_FIELD_CREATE;
-        gauge_field_param.reconstruct = param.reconstruct;
-        gauge_field_param.setPrecision(param.cuda_prec, true);
+        host_gauge_param.create = QUDA_NULL_FIELD_CREATE;
+        host_gauge_param.reconstruct = gauge_param.reconstruct;
+        host_gauge_param.setPrecision(gauge_param.cuda_prec, true);
 
         if (comm_partitioned()) {
           int R[4] = {0, 0, 0, 0};
           for (int d = 0; d < 4; d++)
             if (comm_dim_partitioned(d)) R[d] = 2;
           static TimeProfile GaugeFix("GaugeFix");
-          cudaGaugeField *tmp = new cudaGaugeField(gauge_field_param);
+          cudaGaugeField *tmp = new cudaGaugeField(host_gauge_param);
           tmp->copy(*host);
           U = createExtendedGauge(*tmp, R, GaugeFix);
           delete tmp;
         } else {
-          U = new cudaGaugeField(gauge_field_param);
+          U = new cudaGaugeField(host_gauge_param);
           U->copy(*host);
         }
 
@@ -258,8 +269,8 @@ class GaugeAlgTest : public ::testing::Test
       case 0:
         // Do the Google testing
         break;
-      case 1: run_ovr(); break;
-      case 2: run_fft(); break;
+      case 1: run(); break;
+	//case 2: run_fft(); break;
       default: errorQuda("Invalid test type %d ", test_type);
       }
 
@@ -287,12 +298,24 @@ class GaugeAlgTest : public ::testing::Test
     if (test_type != 0) execute = false;
   }
 
-  virtual void run_ovr()
+  virtual void run()
   {
     if (execute) {
-      printfQuda("%s gauge fixing with overrelaxation method\n", gf_gauge_dir == 4 ? "Landau" : "Coulomb");
-      gaugeFixingOVR(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance,
-                     gf_reunit_interval, gf_theta_condition);
+      // Set gauge fixing params from the command line
+      fix_param = newQudaGaugeFixParam();
+      setGaugeFixParam(fix_param);
+      
+      // Setup gauge container.
+      gauge_param = newQudaGaugeParam();
+      setWilsonGaugeParam(gauge_param);
+      gauge_param.t_boundary = QUDA_PERIODIC_T;
+      gauge_param.location = QUDA_CUDA_FIELD_LOCATION;
+      
+      //GaugeFieldParam param(*U);
+      printfQuda("%s gauge fixing with %s method\n", fix_param.gauge_dir == 4 ? "Landau" : "Coulomb", get_gaugefix_str(fix_param.fix_type));
+      
+      computeGaugeFixingQuda(U->Gauge_p(), &gauge_param, &fix_param, nullptr);
+      
       auto plaq_gf = plaquette(*U);
       printfQuda("Plaq:    %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z);
       printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
@@ -302,46 +325,26 @@ class GaugeAlgTest : public ::testing::Test
       if (gauge_store) save_gauge();
     }
   }
-  virtual void run_fft()
-  {
-    if (execute) {
-      if (!checkDimsPartitioned()) {
-        printfQuda("%s gauge fixing with steepest descent method with FFT\n", gf_gauge_dir == 4 ? "Landau" : "Coulomb");
-        gaugeFixingFFT(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance,
-                       gf_theta_condition);
-
-        auto plaq_gf = plaquette(*U);
-        printfQuda("Plaq:    %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z);
-        printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
-        ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
-        saveTuneCache();
-        // Save if output string is specified
-        if (gauge_store) save_gauge();
-      } else {
-        errorQuda("Cannot perform FFT gauge fixing with MPI partitions.");
-      }
-    }
-  }
-
+  
   virtual void save_gauge()
   {
     printfQuda("Saving the gauge field to file %s\n", gauge_outfile);
 
-    QudaGaugeParam gauge_param = newQudaGaugeParam();
-    setWilsonGaugeParam(gauge_param);
+    //QudaGaugeParam gauge_param = newQudaGaugeParam();
+    //setWilsonGaugeParam(gauge_param);
 
     void *cpu_gauge[4];
     for (int dir = 0; dir < 4; dir++) { cpu_gauge[dir] = safe_malloc(V * gauge_site_size * gauge_param.cpu_prec); }
 
-    GaugeFieldParam gParam(param);
-    gParam.ghostExchange = QUDA_GHOST_EXCHANGE_NO;
-    gParam.create = QUDA_NULL_FIELD_CREATE;
-    gParam.link_type = param.type;
-    gParam.reconstruct = param.reconstruct;
-    gParam.setPrecision(gParam.Precision(), true);
+    GaugeFieldParam param(gauge_param);
+    param.ghostExchange = QUDA_GHOST_EXCHANGE_NO;
+    param.create = QUDA_NULL_FIELD_CREATE;
+    param.link_type = gauge_param.type;
+    param.reconstruct = gauge_param.reconstruct;
+    param.setPrecision(param.Precision(), true);
 
     cudaGaugeField *gauge;
-    gauge = new cudaGaugeField(gParam);
+    gauge = new cudaGaugeField(param);
 
     // copy into regular field
     copyExtendedGauge(*gauge, *U, QUDA_CUDA_FIELD_LOCATION);
@@ -367,8 +370,8 @@ TEST_F(GaugeAlgTest, Landau_Overrelaxation)
 {
   if (execute) {
     printfQuda("Landau gauge fixing with overrelaxation\n");
-    gaugeFixingOVR(*U, 4, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval,
-                   gf_theta_condition);
+    //gaugeFixingOVR(*U, 4, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval,
+    //gf_theta_condition);
     auto plaq_gf = plaquette(*U);
     printfQuda("Plaq:    %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z);
     printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
@@ -381,8 +384,8 @@ TEST_F(GaugeAlgTest, Coulomb_Overrelaxation)
 {
   if (execute) {
     printfQuda("Coulomb gauge fixing with overrelaxation\n");
-    gaugeFixingOVR(*U, 3, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval,
-                   gf_theta_condition);
+    //gaugeFixingOVR(*U, 3, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval,
+    //gf_theta_condition);
     auto plaq_gf = plaquette(*U);
     printfQuda("Plaq:    %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z);
     printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
@@ -396,8 +399,8 @@ TEST_F(GaugeAlgTest, Landau_FFT)
   if (execute) {
     if (!comm_partitioned()) {
       printfQuda("Landau gauge fixing with steepest descent method with FFT\n");
-      gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance,
-                     gf_theta_condition);
+      //gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance,
+      //gf_theta_condition);
       auto plaq_gf = plaquette(*U);
       printfQuda("Plaq:    %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z);
       printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
@@ -412,8 +415,8 @@ TEST_F(GaugeAlgTest, Coulomb_FFT)
   if (execute) {
     if (!comm_partitioned()) {
       printfQuda("Coulomb gauge fixing with steepest descent method with FFT\n");
-      gaugeFixingFFT(*U, 3, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance,
-                     gf_theta_condition);
+      //gaugeFixingFFT(*U, 3, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance,
+      //gf_theta_condition);
       auto plaq_gf = plaquette(*U);
       printfQuda("Plaq:    %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z);
       printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
@@ -445,14 +448,16 @@ int main(int argc, char **argv)
 
   // initialize QMP/MPI, QUDA comms grid and RNG (host_utils.cpp)
   initComms(argc, argv, gridsize_from_cmdline);
-
+  
   QudaGaugeParam gauge_param = newQudaGaugeParam();
-  if (prec_sloppy == QUDA_INVALID_PRECISION) prec_sloppy = prec;
-  if (link_recon_sloppy == QUDA_RECONSTRUCT_INVALID) link_recon_sloppy = link_recon;
-
+  setVerbosity(QUDA_VERBOSE);
+  setQudaPrecisions();
   setWilsonGaugeParam(gauge_param);
   setDims(gauge_param.X);
 
+  //if (prec_sloppy == QUDA_INVALID_PRECISION) prec_sloppy = prec;
+  //if (link_recon_sloppy == QUDA_RECONSTRUCT_INVALID) link_recon_sloppy = link_recon;
+
   display_test_info();
 
   gauge_load = strcmp(latfile, "");
diff --git a/tests/utils/host_utils.cpp b/tests/utils/host_utils.cpp
index c07d0bcacb..d556db0037 100644
--- a/tests/utils/host_utils.cpp
+++ b/tests/utils/host_utils.cpp
@@ -69,6 +69,7 @@ void setQudaPrecisions()
   if (prec_eigensolver == QUDA_INVALID_PRECISION) prec_eigensolver = prec_sloppy;
   if (prec_precondition == QUDA_INVALID_PRECISION) prec_precondition = prec_sloppy;
   if (prec_null == QUDA_INVALID_PRECISION) prec_null = prec_precondition;
+  if (prec_refinement_sloppy == QUDA_INVALID_PRECISION) prec_refinement_sloppy = prec_precondition;
   if (smoother_halo_prec == QUDA_INVALID_PRECISION) smoother_halo_prec = prec_null;
   if (link_recon_sloppy == QUDA_RECONSTRUCT_INVALID) link_recon_sloppy = link_recon;
   if (link_recon_precondition == QUDA_RECONSTRUCT_INVALID) link_recon_precondition = link_recon_sloppy;
diff --git a/tests/utils/misc.cpp b/tests/utils/misc.cpp
index 58cde21a8f..e8fbd5e5d8 100644
--- a/tests/utils/misc.cpp
+++ b/tests/utils/misc.cpp
@@ -151,6 +151,19 @@ const char *get_contract_str(QudaContractType type)
   return ret;
 }
 
+const char *get_gaugefix_str(QudaGaugeFixType type)
+{
+  const char *ret;
+
+  switch (type) {
+  case QUDA_GAUGEFIX_TYPE_OVR: ret = "Overrelaxation"; break;
+  case QUDA_GAUGEFIX_TYPE_FFT: ret = "FFT"; break;
+  default: ret = "unknown"; break;
+  }
+  
+  return ret;
+}
+
 const char *get_eig_spectrum_str(QudaEigSpectrumType type)
 {
   const char *ret;
diff --git a/tests/utils/misc.h b/tests/utils/misc.h
index 5c35480840..bc4c8a2e4b 100644
--- a/tests/utils/misc.h
+++ b/tests/utils/misc.h
@@ -21,6 +21,7 @@ const char *get_eig_type_str(QudaEigType type);
 const char *get_ritz_location_str(QudaFieldLocation type);
 const char *get_memory_type_str(QudaMemoryType type);
 const char *get_contract_str(QudaContractType type);
+const char *get_gaugefix_str(QudaGaugeFixType type);
 
 #define XUP 0
 #define YUP 1

From 69752679f16e729ed80053c468f914fc258559ee Mon Sep 17 00:00:00 2001
From: cpviolator <dmhowarth26@gmail.com>
Date: Thu, 16 Dec 2021 17:45:02 -0800
Subject: [PATCH 26/32] Use gauge fix param for GPU fix function. Need to fix
 interface for CPU/MILC

---
 include/gauge_tools.h    | 31 +++-----------
 include/quda.h           |  2 +-
 lib/check_params.h       |  2 +-
 lib/gauge_fix_fft.cu     | 42 +++++++++----------
 lib/gauge_fix_ovr.cu     | 43 +++++++++----------
 lib/interface_quda.cpp   | 63 ++++++++++------------------
 tests/gauge_alg_test.cpp | 91 +++++++++++++++++++++++-----------------
 7 files changed, 123 insertions(+), 151 deletions(-)

diff --git a/include/gauge_tools.h b/include/gauge_tools.h
index e067f1b7f2..b53134ecfc 100644
--- a/include/gauge_tools.h
+++ b/include/gauge_tools.h
@@ -113,37 +113,16 @@ namespace quda
   /**
    * @brief Gauge fixing with overrelaxation with support for single and multi GPU.
    * @param[in,out] data, quda gauge field
-   * @param[in] gauge_dir, 3 for Coulomb gauge fixing, other for Landau gauge fixing
-   * @param[in] steps, maximum number of steps to perform gauge fixing
-   * @param[in] verbose_interval, print gauge fixing info when iteration count is a multiple of this
-   * @param[in] relax_boost, gauge fixing parameter of the overrelaxation method, most common value is 1.5 or 1.7.
-   * @param[in] tolerance, torelance value to stop the method, if this
-   * value is zero then the method stops when iteration reachs the
-   * maximum number of steps defined by steps
-   * @param[in] reunit_interval, reunitarize gauge field when iteration count is a multiple of this
-   * @param[in] theta_condition, QUDA_BOOLEAN_FALSE for MILC criterion and QUDA_BOOLEAN_TRUE to use the theta value
+   * @param[in] fix_param Parameter struct that defines the gauge fixing
    */
-  void gaugeFixingOVR(GaugeField &data, const int gauge_dir, const int steps, const int verbose_interval,
-                      const double relax_boost, const double tolerance, const int reunit_interval,
-                      const QudaBoolean theta_condition);
-
+  void gaugeFixingOVR(GaugeField &data, QudaGaugeFixParam &fix_param);
+  
   /**
    * @brief Gauge fixing with Steepest descent method with FFTs with support for single GPU only.
    * @param[in,out] data, quda gauge field
-   * @param[in] gauge_dir, 3 for Coulomb gauge fixing, other for Landau gauge fixing
-   * @param[in] steps, maximum number of steps to perform gauge fixing
-   * @param[in] verbose_interval, print gauge fixing info when iteration count is a multiple of this
-   * @param[in] alpha, gauge fixing parameter of the method, most common value is 0.08
-   * @param[in] autotune, QUDA_BOOLEAN_TRUE to autotune the method, i.e., if the fix quality inverts its tendency we
-   * decrease the alpha value
-   * @param[in] tolerance, torelance value to stop the method, if this
-   * value is zero then the method stops when iteration reachs the
-   * maximum number of steps defined by steps
-   * @param[in] theta_condition, QUDA_BOOLEAN_FALSE for MILC criterion and QUDA_BOOLEAN_TRUE to use the theta value
+   * @param[in] fix_param Parameter struct that defines the gauge fixing
    */
-  void gaugeFixingFFT(GaugeField &data, const int gauge_dir, const int steps, const int verbose_interval,
-                      const double alpha, const QudaBoolean autotune, const double tolerance,
-                      const QudaBoolean theta_condition);
+  void gaugeFixingFFT(GaugeField &data, QudaGaugeFixParam &fix_param);
 
   /**
      @brief Compute the Fmunu tensor
diff --git a/include/quda.h b/include/quda.h
index 0b7d633ec9..02b8601809 100644
--- a/include/quda.h
+++ b/include/quda.h
@@ -777,7 +777,7 @@ extern "C" {
     QudaBLASDataType data_type;   /**< Specifies if using S(C) or D(Z) BLAS type */
     QudaBLASDataOrder data_order; /**< Specifies if using Row or Column major */
   } QudaBLASParam;
-
+    
   typedef struct QudaGaugeFixParam_s {
     size_t struct_size; /**< Size of this struct in bytes.  Used to ensure that the host application and QUDA see the same struct size */
 
diff --git a/lib/check_params.h b/lib/check_params.h
index 0dbb87d96d..0f2982f8eb 100644
--- a/lib/check_params.h
+++ b/lib/check_params.h
@@ -268,7 +268,7 @@ void printQudaCloverParam(QudaInvertParam *param)
 
 #if defined CHECK_PARAM
   if (param->struct_size != (size_t)INVALID_INT && param->struct_size != sizeof(*param))
-    errorQuda("Unexpected QudaInvertParam struct size %lu, expected %lu", param->struct_size, sizeof(*param));
+    errorQuda("Unexpected QudaCloverParam struct size %lu, expected %lu", param->struct_size, sizeof(*param));
 #else
   P(struct_size, (size_t)INVALID_INT);
 #endif
diff --git a/lib/gauge_fix_fft.cu b/lib/gauge_fix_fft.cu
index 74e0849d24..b9aec49975 100644
--- a/lib/gauge_fix_fft.cu
+++ b/lib/gauge_fix_fft.cu
@@ -182,11 +182,17 @@ namespace quda {
   };
 
   template <typename Float, QudaReconstructType recon, int gauge_dir>
-  void gaugeFixingFFT(GaugeField& data, int steps, int verbose_interval,
-                      double alpha0, QudaBoolean autotune, double tolerance, QudaBoolean theta_condition)
+  void gaugeFixingFFT(GaugeField& data, QudaGaugeFixParam &fix_param)
   {
     TimeProfile profileInternalGaugeFixFFT("InternalGaugeFixQudaFFT", false);
-
+    
+    QudaBoolean autotune = fix_param.fft_autotune;
+    double alpha0 = fix_param.fft_alpha;
+    double tolerance = fix_param.tolerance;
+    QudaBoolean theta_condition = fix_param.theta_condition;
+    int steps = fix_param.maxiter;
+    int verbose_interval = fix_param.verbosity_interval;
+    
     profileInternalGaugeFixFFT.TPSTART(QUDA_PROFILE_COMPUTE);
 
     if (getVerbosity() >= QUDA_SUMMARIZE) {
@@ -364,15 +370,16 @@ namespace quda {
   }
 
   template<typename Float, int nColors, QudaReconstructType recon> struct GaugeFixingFFT {
-    GaugeFixingFFT(GaugeField& data, int gauge_dir, int steps, int verbose_interval,
-                   double alpha, QudaBoolean autotune, double tolerance, QudaBoolean theta_condition)
+    GaugeFixingFFT(GaugeField& data, QudaGaugeFixParam &fix_param)
     {
-      if (gauge_dir != 3) {
-	if (getVerbosity() > QUDA_SUMMARIZE) printfQuda("Starting Landau gauge fixing with FFTs...\n");
-        gaugeFixingFFT<Float, recon, 4>(data, steps, verbose_interval, alpha, autotune, tolerance, theta_condition);
+      if (fix_param.gauge_dir == 4) {
+	if (getVerbosity() > QUDA_SUMMARIZE) printfQuda("Starting Landau gauge fixing with FFTs\n");
+        gaugeFixingFFT<Float, recon, 4>(data, fix_param);
+      } else if (fix_param.gauge_dir == 3) {
+	if (getVerbosity() > QUDA_SUMMARIZE) printfQuda("Starting Coulomb gauge fixing with FFTs\n");
+        gaugeFixingFFT<Float, recon, 3>(data, fix_param);	
       } else {
-	if (getVerbosity() > QUDA_SUMMARIZE) printfQuda("Starting Coulomb gauge fixing with FFTs...\n");
-        gaugeFixingFFT<Float, recon, 3>(data, steps, verbose_interval, alpha, autotune, tolerance, theta_condition);
+	errorQuda("Unexpected gauge_dir = %d", fix_param.gauge_dir); 
       }
     }
   };
@@ -380,23 +387,16 @@ namespace quda {
   /**
    * @brief Gauge fixing with Steepest descent method with FFTs with support for single GPU only.
    * @param[in,out] data, quda gauge field
-   * @param[in] gauge_dir, 3 for Coulomb gauge fixing, other for Landau gauge fixing
-   * @param[in] steps, maximum number of steps to perform gauge fixing
-   * @param[in] verbose_interval, print gauge fixing info when iteration count is a multiple of this
-   * @param[in] alpha, gauge fixing parameter of the method, most common value is 0.08
-   * @param[in] autotune QUDA_BOOLEAN_TRUE to autotune the method, i.e., if the fix quality inverts its tendency we decrease the alpha value.
-   * @param[in] tolerance, torelance value to stop the method, if this value is zero then the method stops when iteration reachs the maximum number of steps defined by steps
-   * @param[in] theta_condition, QUDA_BOOLEAN_FALSE for MILC criterion and QUDA_BOOLEAN_TRUE to use the theta value
+   * @param[in] fix_param Parameter struct defining the gauge fixing
    */
 #if defined(GPU_GAUGE_ALG)
-  void gaugeFixingFFT(GaugeField& data, const int gauge_dir, const int steps, const int verbose_interval, const double alpha,
-                      const QudaBoolean autotune, const double tolerance, const QudaBoolean theta_condition)
+  void gaugeFixingFFT(GaugeField& data, QudaGaugeFixParam &fix_param)
   {
     if (comm_partitioned()) errorQuda("Gauge Fixing with FFTs in multi-GPU support NOT implemented yet!");
-    instantiate<GaugeFixingFFT, ReconstructNo12>(data, gauge_dir, steps, verbose_interval, alpha, autotune, tolerance, theta_condition);
+    instantiate<GaugeFixingFFT, ReconstructNo12>(data, fix_param);
   }
 #else
-  void gaugeFixingFFT(GaugeField&, const int, const int, const int, const double, const QudaBoolean, const double, const QudaBoolean)
+  void gaugeFixingFFT(GaugeField&, QudaGaugeFixParam &)
   {
     errorQuda("Gauge fixing has bot been built");
   }
diff --git a/lib/gauge_fix_ovr.cu b/lib/gauge_fix_ovr.cu
index e56e5e05c5..aff0df2fef 100644
--- a/lib/gauge_fix_ovr.cu
+++ b/lib/gauge_fix_ovr.cu
@@ -223,12 +223,17 @@ namespace quda {
   };
 
   template <typename Float, QudaReconstructType recon, int gauge_dir>
-  void gaugeFixingOVR(GaugeField &data, const int steps, const int verbose_interval,
-                      const double relax_boost, const double tolerance,
-                      const int reunit_interval, const QudaBoolean theta_condition)
+  void gaugeFixingOVR(GaugeField &data, QudaGaugeFixParam &fix_param)
   {
     TimeProfile profileInternalGaugeFixOVR("InternalGaugeFixQudaOVR", false);
 
+    double relax_boost = fix_param.ovr_relaxation_boost;
+    double tolerance = fix_param.tolerance;
+    QudaBoolean theta_condition = fix_param.theta_condition;
+    int steps = fix_param.maxiter;
+    int reunit_interval = fix_param.reunit_interval;
+    int verbose_interval = fix_param.verbosity_interval;
+    
     profileInternalGaugeFixOVR.TPSTART(QUDA_PROFILE_COMPUTE);
     double flop = 0;
     double byte = 0;
@@ -470,17 +475,16 @@ namespace quda {
   }
 
   template <typename Float, int nColor, QudaReconstructType recon> struct GaugeFixingOVR {
-  GaugeFixingOVR(GaugeField& data, const int gauge_dir, const int steps, const int verbose_interval,
-                 const double relax_boost, const double tolerance, const int reunit_interval, const QudaBoolean theta_condition)
+  GaugeFixingOVR(GaugeField& data, QudaGaugeFixParam &fix_param)
     {
-      if (gauge_dir == 4) {
-	if (getVerbosity() >= QUDA_SUMMARIZE) printfQuda("Starting Landau gauge fixing...\n");
-        gaugeFixingOVR<Float, recon, 4>(data, steps, verbose_interval, relax_boost, tolerance, reunit_interval, theta_condition);
-      } else if (gauge_dir == 3) {
-	if (getVerbosity() >= QUDA_SUMMARIZE) printfQuda("Starting Coulomb gauge fixing...\n");
-        gaugeFixingOVR<Float, recon, 3>(data, steps, verbose_interval, relax_boost, tolerance, reunit_interval, theta_condition);
+      if (fix_param.gauge_dir == 4) {
+	if (getVerbosity() >= QUDA_SUMMARIZE) printfQuda("Starting Landau gauge fixing with Overrelaxation\n");
+        gaugeFixingOVR<Float, recon, 4>(data, fix_param);
+      } else if (fix_param.gauge_dir == 3) {
+	if (getVerbosity() >= QUDA_SUMMARIZE) printfQuda("Starting Coulomb gauge fixing with Overrelaxation\n");
+        gaugeFixingOVR<Float, recon, 3>(data, fix_param);
       } else {
-        errorQuda("Unexpected gauge_dir = %d", gauge_dir);
+        errorQuda("Unexpected gauge_dir = %d", fix_param.gauge_dir);
       }
     }
   };
@@ -488,22 +492,15 @@ namespace quda {
   /**
    * @brief Gauge fixing with overrelaxation with support for single and multi GPU.
    * @param[in,out] data, quda gauge field
-   * @param[in] gauge_dir, 3 for Coulomb gauge fixing, other for Landau gauge fixing
-   * @param[in] steps, maximum number of steps to perform gauge fixing
-   * @param[in] verbose_interval, print gauge fixing info when iteration count is a multiple of this
-   * @param[in] relax_boost, gauge fixing parameter of the overrelaxation method, most common value is 1.5 or 1.7.
-   * @param[in] tolerance, torelance value to stop the method, if this value is zero then the method stops when iteration reachs the maximum number of steps defined by steps
-   * @param[in] reunit_interval, reunitarize gauge field when iteration count is a multiple of this
-   * @param[in] theta_condition, QUDA_BOOLEAN_FALSE for MILC criterion and QUDA_BOOLEAN_TRUE to use the theta value
+   * @param[in] fix_param Parameter struct defining the gauge fixing
    */
 #ifdef GPU_GAUGE_ALG
-  void gaugeFixingOVR(GaugeField& data, const int gauge_dir, const int steps, const int verbose_interval, const double relax_boost,
-                      const double tolerance, const int reunit_interval, const QudaBoolean theta_condition)
+  void gaugeFixingOVR(GaugeField& data, QudaGaugeFixParam &fix_param)
   {
-    instantiate<GaugeFixingOVR>(data, gauge_dir, steps, verbose_interval, relax_boost, tolerance, reunit_interval, theta_condition);
+    instantiate<GaugeFixingOVR>(data, fix_param);
   }
 #else
-  void gaugeFixingOVR(GaugeField&, const int, const int, const int, const double, const double, const int, const QudaBoolean)
+  void gaugeFixingOVR(GaugeField&, QudaGaugeFixParam &)
   {
     errorQuda("Gauge fixing has not been built");
   }
diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp
index dce0f6ef06..355a29cf89 100644
--- a/lib/interface_quda.cpp
+++ b/lib/interface_quda.cpp
@@ -5531,39 +5531,25 @@ int computeGaugeFixingQuda(void *gauge, QudaGaugeParam *g_param, QudaGaugeFixPar
   checkGaugeParam(g_param);
   checkGaugeFixParam(fix_param);
 
-  cudaGaugeField *device_gauge = nullptr;
-  cpuGaugeField *cpu_gauge = nullptr;
+  profileGaugeFix.TPSTART(QUDA_PROFILE_INIT);
+  GaugeFieldParam gauge_param(*g_param, gauge);
+  //printfQuda("CPU start\n");
+  //auto *cpu_gauge = new cpuGaugeField(gauge_param);
+  //printfQuda("CPU done\n");
+  gauge_param.create = QUDA_REFERENCE_FIELD_CREATE;
+  gauge_param.link_type = g_param->type;
+  gauge_param.reconstruct = g_param->reconstruct;
+  gauge_param.setPrecision(gauge_param.Precision(), true);
+  auto *device_gauge = new cudaGaugeField(gauge_param);
+  printfQuda("GPU done\n");
+  profileGaugeFix.TPSTOP(QUDA_PROFILE_INIT);
   
-  // Create host and device fields
-  if(g_param->location == QUDA_CPU_FIELD_LOCATION) {
-    // The gauge field is on the CPU. We must
-    // create a GPU gauge and transfer. 
-    profileGaugeFix.TPSTART(QUDA_PROFILE_INIT);
-    GaugeFieldParam gauge_param(*g_param, gauge);
-    cpu_gauge = new cpuGaugeField(gauge_param);
-    gauge_param.create = QUDA_NULL_FIELD_CREATE;
-    gauge_param.link_type = g_param->type;
-    gauge_param.reconstruct = g_param->reconstruct;
-    gauge_param.setPrecision(gauge_param.Precision(), true);
-    device_gauge = new cudaGaugeField(gauge_param);
-    profileGaugeFix.TPSTOP(QUDA_PROFILE_INIT);
-
-    // Load gauge to device
-    profileGaugeFix.TPSTART(QUDA_PROFILE_H2D);
-    device_gauge->loadCPUField(*cpu_gauge);
-    profileGaugeFix.TPSTOP(QUDA_PROFILE_H2D);
-  } else {
-    // The gauge field is on the GPU already, so
-    // we can just reference that field.
-    profileGaugeFix.TPSTART(QUDA_PROFILE_INIT);
-    GaugeFieldParam gauge_param(*g_param, gauge);
-    gauge_param.create = QUDA_REFERENCE_FIELD_CREATE;
-    gauge_param.link_type = g_param->type;
-    gauge_param.reconstruct = g_param->reconstruct;
-    gauge_param.setPrecision(gauge_param.Precision(), true);
-    device_gauge = new cudaGaugeField(gauge_param);    
-    profileGaugeFix.TPSTOP(QUDA_PROFILE_INIT);
-  }    
+  // Load gauge to device
+  profileGaugeFix.TPSTART(QUDA_PROFILE_H2D);
+  //device_gauge->loadCPUField(*cpu_gauge);
+  profileGaugeFix.TPSTOP(QUDA_PROFILE_H2D);
+
+  printfQuda("Perform update\n");
   
   // Perform the update
   switch (fix_param->fix_type) {
@@ -5571,17 +5557,13 @@ int computeGaugeFixingQuda(void *gauge, QudaGaugeParam *g_param, QudaGaugeFixPar
   case QUDA_GAUGEFIX_TYPE_OVR:
     if (comm_size() == 1) {
       profileGaugeFix.TPSTART(QUDA_PROFILE_COMPUTE);
-      gaugeFixingOVR(*device_gauge, fix_param->gauge_dir, fix_param->maxiter, fix_param->verbosity_interval,
-                     fix_param->ovr_relaxation_boost, fix_param->tolerance, fix_param->reunit_interval,
-                     fix_param->theta_condition);
+      gaugeFixingOVR(*device_gauge, *fix_param);
       profileGaugeFix.TPSTOP(QUDA_PROFILE_COMPUTE);
     } else {
       // For MPI, we must perform a halo exchange
       cudaGaugeField *device_gauge_extended = createExtendedGauge(*device_gauge, R, profileGaugeFix);
       profileGaugeFix.TPSTART(QUDA_PROFILE_COMPUTE);
-      gaugeFixingOVR(*device_gauge_extended, fix_param->gauge_dir, fix_param->maxiter, fix_param->verbosity_interval,
-                     fix_param->ovr_relaxation_boost, fix_param->tolerance, fix_param->reunit_interval,
-                     fix_param->theta_condition);
+      gaugeFixingOVR(*device_gauge_extended, *fix_param);
       profileGaugeFix.TPSTOP(QUDA_PROFILE_COMPUTE);
       copyExtendedGauge(*device_gauge, *device_gauge_extended, QUDA_CUDA_FIELD_LOCATION);
     }
@@ -5589,8 +5571,7 @@ int computeGaugeFixingQuda(void *gauge, QudaGaugeParam *g_param, QudaGaugeFixPar
 
   case QUDA_GAUGEFIX_TYPE_FFT:
     profileGaugeFix.TPSTART(QUDA_PROFILE_COMPUTE);
-    gaugeFixingFFT(*device_gauge, fix_param->gauge_dir, fix_param->maxiter, fix_param->verbosity_interval,
-                   fix_param->fft_alpha, fix_param->fft_autotune, fix_param->tolerance, fix_param->theta_condition);
+    gaugeFixingFFT(*device_gauge, *fix_param);
     profileGaugeFix.TPSTOP(QUDA_PROFILE_COMPUTE);
     break;
 
@@ -5601,7 +5582,7 @@ int computeGaugeFixingQuda(void *gauge, QudaGaugeParam *g_param, QudaGaugeFixPar
   // from the host
   if(g_param->location == QUDA_CPU_FIELD_LOCATION) {
     profileGaugeFix.TPSTART(QUDA_PROFILE_D2H);
-    device_gauge->saveCPUField(*cpu_gauge);
+    //device_gauge->saveCPUField(*cpu_gauge);
     profileGaugeFix.TPSTOP(QUDA_PROFILE_D2H);
   }
   
diff --git a/tests/gauge_alg_test.cpp b/tests/gauge_alg_test.cpp
index a0a068d8a9..2f467f7b0d 100644
--- a/tests/gauge_alg_test.cpp
+++ b/tests/gauge_alg_test.cpp
@@ -11,7 +11,6 @@
 #include <command_line_params.h>
 #include <misc.h>
 #include <timer.h>
-
 #include <gauge_tools.h>
 #include <tune_quda.h>
 
@@ -39,17 +38,28 @@ bool gauge_load;
 bool gauge_store;
 void *host_gauge[4];
 
+// Define the command line options and option group for this test
+int gf_gauge_dir = 4;
+int gf_maxiter = 10000;
+int gf_verbosity_interval = 100;
+double gf_ovr_relaxation_boost = 1.5;
+double gf_fft_alpha = 0.8;
+bool gf_fft_autotune = true;
+int gf_reunit_interval = 10;
+double gf_tolerance = 1e-6;
+bool gf_theta_condition = false;
+QudaGaugeFixType gf_fix_type = QUDA_GAUGEFIX_TYPE_OVR;
+
 void display_test_info()
 {
   printfQuda("running the following test:\n");
 
   switch (test_type) {
   case 0: printfQuda("\n Google testing\n"); break;
-  case 1: printfQuda("\nOVR gauge fix\n"); break;
-  case 2: printfQuda("\nFFT gauge fix\n"); break;
+  case 1: printfQuda("\n%s %s gauge fix\n", get_gaugefix_str(gf_fix_type), gf_gauge_dir == 4 ? "Landau" : "Coulomb"); break;
   default: errorQuda("Undefined test type %d given", test_type);
   }
-
+  
   printfQuda("prec    sloppy_prec    link_recon  sloppy_link_recon S_dimension T_dimension Ls_dimension\n");
   printfQuda("%s   %s             %s            %s            %d/%d/%d          %d         %d\n", get_prec_str(prec),
              get_prec_str(prec_sloppy), get_recon_str(link_recon), get_recon_str(link_recon_sloppy), xdim, ydim, zdim,
@@ -60,18 +70,6 @@ void display_test_info()
              dimPartitioned(3));
 }
 
-// Define the command line options and option group for this test
-int gf_gauge_dir = 4;
-int gf_maxiter = 10000;
-int gf_verbosity_interval = 100;
-double gf_ovr_relaxation_boost = 1.5;
-double gf_fft_alpha = 0.8;
-bool gf_fft_autotune = true;
-int gf_reunit_interval = 10;
-double gf_tolerance = 1e-6;
-bool gf_theta_condition = false;
-QudaGaugeFixType gf_fix_type = QUDA_GAUGEFIX_TYPE_OVR;
-
 void add_gaugefix_option_group(std::shared_ptr<QUDAApp> quda_app)
 {
   CLI::TransformPairs<QudaGaugeFixType> fix_type_map {{"ovr", QUDA_GAUGEFIX_TYPE_OVR}, {"fft", QUDA_GAUGEFIX_TYPE_FFT}};
@@ -370,8 +368,14 @@ TEST_F(GaugeAlgTest, Landau_Overrelaxation)
 {
   if (execute) {
     printfQuda("Landau gauge fixing with overrelaxation\n");
-    //gaugeFixingOVR(*U, 4, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval,
-    //gf_theta_condition);
+    // Set gauge fixing params from the command line
+    // and adjust for this test type
+    fix_param = newQudaGaugeFixParam();
+    setGaugeFixParam(fix_param);
+    fix_param.fix_type = QUDA_GAUGEFIX_TYPE_OVR;
+    fix_param.gauge_dir = 4;
+    
+    gaugeFixingOVR(*U, fix_param);
     auto plaq_gf = plaquette(*U);
     printfQuda("Plaq:    %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z);
     printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
@@ -384,8 +388,14 @@ TEST_F(GaugeAlgTest, Coulomb_Overrelaxation)
 {
   if (execute) {
     printfQuda("Coulomb gauge fixing with overrelaxation\n");
-    //gaugeFixingOVR(*U, 3, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval,
-    //gf_theta_condition);
+    // Use gauge fixing params from the command line
+    // and adjust for this test type
+    fix_param = newQudaGaugeFixParam();
+    setGaugeFixParam(fix_param);
+    fix_param.fix_type = QUDA_GAUGEFIX_TYPE_OVR;
+    fix_param.gauge_dir = 3;
+
+    gaugeFixingOVR(*U, fix_param);
     auto plaq_gf = plaquette(*U);
     printfQuda("Plaq:    %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z);
     printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
@@ -399,8 +409,14 @@ TEST_F(GaugeAlgTest, Landau_FFT)
   if (execute) {
     if (!comm_partitioned()) {
       printfQuda("Landau gauge fixing with steepest descent method with FFT\n");
-      //gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance,
-      //gf_theta_condition);
+      // Set gauge fixing params from the command line
+      // and adjust for this test type
+      fix_param = newQudaGaugeFixParam();
+      setGaugeFixParam(fix_param);
+      fix_param.fix_type = QUDA_GAUGEFIX_TYPE_FFT;
+      fix_param.gauge_dir = 4;
+    
+      gaugeFixingFFT(*U, fix_param);
       auto plaq_gf = plaquette(*U);
       printfQuda("Plaq:    %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z);
       printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
@@ -415,8 +431,14 @@ TEST_F(GaugeAlgTest, Coulomb_FFT)
   if (execute) {
     if (!comm_partitioned()) {
       printfQuda("Coulomb gauge fixing with steepest descent method with FFT\n");
-      //gaugeFixingFFT(*U, 3, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance,
-      //gf_theta_condition);
+      // Set gauge fixing params from the command line
+      // and adjust for this test type
+      fix_param = newQudaGaugeFixParam();
+      setGaugeFixParam(fix_param);
+      fix_param.fix_type = QUDA_GAUGEFIX_TYPE_FFT;
+      fix_param.gauge_dir = 3;
+
+      gaugeFixingFFT(*U, fix_param);
       auto plaq_gf = plaquette(*U);
       printfQuda("Plaq:    %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z);
       printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
@@ -439,7 +461,6 @@ int main(int argc, char **argv)
   test_type = 0;
   CLI::TransformPairs<int> test_type_map {{"Google", 0}, {"OVR", 1}, {"FFT", 2}};
   app->add_option("--test", test_type, "Test method")->transform(CLI::CheckedTransformer(test_type_map));
-
   try {
     app->parse(argc, argv);
   } catch (const CLI::ParseError &e) {
@@ -448,23 +469,23 @@ int main(int argc, char **argv)
 
   // initialize QMP/MPI, QUDA comms grid and RNG (host_utils.cpp)
   initComms(argc, argv, gridsize_from_cmdline);
-  
   QudaGaugeParam gauge_param = newQudaGaugeParam();
   setVerbosity(QUDA_VERBOSE);
   setQudaPrecisions();
   setWilsonGaugeParam(gauge_param);
   setDims(gauge_param.X);
-
-  //if (prec_sloppy == QUDA_INVALID_PRECISION) prec_sloppy = prec;
-  //if (link_recon_sloppy == QUDA_RECONSTRUCT_INVALID) link_recon_sloppy = link_recon;
-
+  // call srand() with a rank-dependent seed
+  initRand();
+  // initialize the QUDA library
+  initQuda(device_ordinal);
+  
   display_test_info();
 
+  // If we are passing a gauge field to the test, we must allocate host memory.
+  // If no gauge is passed, we generate a quenched field on the device.
   gauge_load = strcmp(latfile, "");
   gauge_store = strcmp(gauge_outfile, "");
 
-  // If we are passing a gauge field to the test, we must allocate host memory.
-  // If no gauge is passed, we generate a quenched field on the device.
   if (gauge_load) {
     printfQuda("Loading gauge field from host\n");
     for (int dir = 0; dir < 4; dir++) {
@@ -473,12 +494,6 @@ int main(int argc, char **argv)
     constructHostGaugeField(host_gauge, gauge_param, argc, argv);
   }
 
-  // call srand() with a rank-dependent seed
-  initRand();
-
-  // initialize the QUDA library
-  initQuda(device_ordinal);
-
   // initalize google test, includes command line options
   ::testing::InitGoogleTest(&argc, argv);
 

From 84b675d67073c57cf58165596c2aa810f374d146 Mon Sep 17 00:00:00 2001
From: cpviolator <dmhowarth26@gmail.com>
Date: Tue, 21 Dec 2021 18:20:57 -0800
Subject: [PATCH 27/32] Restrict interface to CPU gauge fields, enforces GPU
 kernel call for GPU fields

---
 lib/interface_quda.cpp   |  41 ++++++------
 tests/gauge_alg_test.cpp | 140 +++++++++++++++++++++++----------------
 2 files changed, 105 insertions(+), 76 deletions(-)

diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp
index 355a29cf89..d92b1da7b6 100644
--- a/lib/interface_quda.cpp
+++ b/lib/interface_quda.cpp
@@ -5524,32 +5524,35 @@ int computeGaugeFixingQuda(void *gauge, QudaGaugeParam *g_param, QudaGaugeFixPar
   profileGaugeFix.TPSTART(QUDA_PROFILE_TOTAL);
 
   if (!initialized) errorQuda("QUDA not initialized");
-  printQudaGaugeParam(g_param);
-  printQudaGaugeFixParam(fix_param);
+  if (getVerbosity() == QUDA_DEBUG_VERBOSE) {
+    printQudaGaugeParam(g_param);
+    printQudaGaugeFixParam(fix_param);
+  }
   
   // Check parameters
   checkGaugeParam(g_param);
   checkGaugeFixParam(fix_param);
 
+  if(g_param->location == QUDA_CUDA_FIELD_LOCATION) {
+    errorQuda("GPU gauge fixing not supported via QUDA interface. Please use direct kernel call: gaugeFixingOVR/gaugeFixingFFT");
+  }
+  
   profileGaugeFix.TPSTART(QUDA_PROFILE_INIT);
   GaugeFieldParam gauge_param(*g_param, gauge);
-  //printfQuda("CPU start\n");
-  //auto *cpu_gauge = new cpuGaugeField(gauge_param);
-  //printfQuda("CPU done\n");
-  gauge_param.create = QUDA_REFERENCE_FIELD_CREATE;
+  auto *cpu_gauge = new cpuGaugeField(gauge_param);
+
+  // Make GPU field
+  gauge_param.create = QUDA_NULL_FIELD_CREATE;
   gauge_param.link_type = g_param->type;
   gauge_param.reconstruct = g_param->reconstruct;
   gauge_param.setPrecision(gauge_param.Precision(), true);
-  auto *device_gauge = new cudaGaugeField(gauge_param);
-  printfQuda("GPU done\n");
+  auto *device_gauge = new cudaGaugeField(gauge_param);    
   profileGaugeFix.TPSTOP(QUDA_PROFILE_INIT);
-  
+    
   // Load gauge to device
   profileGaugeFix.TPSTART(QUDA_PROFILE_H2D);
-  //device_gauge->loadCPUField(*cpu_gauge);
+  device_gauge->loadCPUField(*cpu_gauge);
   profileGaugeFix.TPSTOP(QUDA_PROFILE_H2D);
-
-  printfQuda("Perform update\n");
   
   // Perform the update
   switch (fix_param->fix_type) {
@@ -5578,14 +5581,11 @@ int computeGaugeFixingQuda(void *gauge, QudaGaugeParam *g_param, QudaGaugeFixPar
   default: errorQuda("Unkown gauge fix type %d", fix_param->fix_type);
   }
 
-  // Copy the fixed gauge field back to the host if it came
-  // from the host
-  if(g_param->location == QUDA_CPU_FIELD_LOCATION) {
-    profileGaugeFix.TPSTART(QUDA_PROFILE_D2H);
-    //device_gauge->saveCPUField(*cpu_gauge);
-    profileGaugeFix.TPSTOP(QUDA_PROFILE_D2H);
-  }
-  
+  // Copy the fixed gauge field back to the host.
+  profileGaugeFix.TPSTART(QUDA_PROFILE_D2H);
+  device_gauge->saveCPUField(*cpu_gauge);
+  profileGaugeFix.TPSTOP(QUDA_PROFILE_D2H);
+    
   profileGaugeFix.TPSTOP(QUDA_PROFILE_TOTAL);
 
   if (g_param->make_resident_gauge) {
@@ -5594,6 +5594,7 @@ int computeGaugeFixingQuda(void *gauge, QudaGaugeParam *g_param, QudaGaugeFixPar
   } else {
     delete device_gauge;
   }
+  delete cpu_gauge;
   
   if(timeinfo){
     timeinfo[0] = profileGaugeFix.Last(QUDA_PROFILE_H2D);
diff --git a/tests/gauge_alg_test.cpp b/tests/gauge_alg_test.cpp
index 2f467f7b0d..7648e64321 100644
--- a/tests/gauge_alg_test.cpp
+++ b/tests/gauge_alg_test.cpp
@@ -118,8 +118,9 @@ class GaugeAlgTest : public ::testing::Test
   QudaGaugeFixParam fix_param;
 
   host_timer_t host_timer_1, host_timer_2;
-  double2 detu;
-  double3 plaq;
+  double2 det_u;
+  double2 trace_u;
+  double3 plaq_u;
   cudaGaugeField *U;
   int nsteps;
   int nhbsteps;
@@ -215,8 +216,8 @@ class GaugeAlgTest : public ::testing::Test
           unitarizeLinks(*U, num_failures_d);
           qudaDeviceSynchronize();
           if (*num_failures_h > 0) errorQuda("Error in the unitarization (%d errors)", *num_failures_h);
-          plaq = plaquette(*U);
-          printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z);
+          plaq_u = plaquette(*U);
+          printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_u.x, plaq_u.y, plaq_u.z);
         }
 
         host_timer_2.stop();
@@ -256,19 +257,23 @@ class GaugeAlgTest : public ::testing::Test
         unitarizeLinks(*U, num_failures_d);
         qudaDeviceSynchronize();
         if (*num_failures_h > 0) errorQuda("Error in the unitarization (%d errors)", *num_failures_h);
-
-        plaq = plaquette(*U);
-        printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z);
       }
 
-      // If a specific test type is requested, perfrom it now and then
+      // Unfixed Gauge data
+      plaq_u = plaquette(*U);
+      det_u = getLinkDeterminant(*U);
+      trace_u = getLinkTrace(*U);
+      printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_u.x, plaq_u.y, plaq_u.z);
+      printfQuda("Det: %.16e:%.16e\n", det_u.x, det_u.y);
+      printfQuda("Tr: %.16e:%.16e\n", trace_u.x / 3.0, trace_u.y / 3.0);      
+      
+      // If a specific test type is requested, perform it now and then
       // turn off all Google tests in the tear down.
       switch (test_type) {
-      case 0:
-        // Do the Google testing
-        break;
-      case 1: run(); break;
-	//case 2: run_fft(); break;
+      case 0: // Do the Google testing
+	break;
+      case 1: // Do a specific test
+	run(); break;
       default: errorQuda("Invalid test type %d ", test_type);
       }
 
@@ -279,21 +284,37 @@ class GaugeAlgTest : public ::testing::Test
   virtual void TearDown()
   {
     if (execute) {
-      detu = getLinkDeterminant(*U);
-      double2 tru = getLinkTrace(*U);
-      printfQuda("Det: %.16e:%.16e\n", detu.x, detu.y);
-      printfQuda("Tr: %.16e:%.16e\n", tru.x / 3.0, tru.y / 3.0);
+
+      // Compare gauge fixed data with original data
+      auto plaq_gf = plaquette(*U);
+      auto det_gf = getLinkDeterminant(*U);
+      auto trace_gf = getLinkTrace(*U);
+      printfQuda("Plaq:     %.16e, %.16e, %.16e\n", plaq_u.x, plaq_u.y, plaq_u.z);
+      printfQuda("Plaq GF:  %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
+      printfQuda("Det:      %.16e, %.16e\n", det_u.x, det_u.y);
+      printfQuda("Det GF:   %.16e, %.16e\n", det_gf.x, det_gf.y);
+      printfQuda("Trace:    %.16e, %.16e\n", trace_u.x / 3.0, trace_u.y / 3.0);
+      printfQuda("Trace GF: %.16e, %.16e\n", trace_gf.x / 3.0, trace_gf.y / 3.0);
+
+      // As an observable, the plaquette value must remain invariant after
+      // gauge fixing.
+      ASSERT_TRUE(comparePlaquette(plaq_u, plaq_gf));
+
+      // The determinant of any SU(N) gauge field element must be (1.0,0.0) to
+      // machine precision
+      ASSERT_TRUE(CheckDeterminant(det_gf));
 
       delete U;
       // Release all temporary memory used for data exchange between GPUs in multi-GPU mode
       PGaugeExchangeFree();
-
+      
       host_timer_1.stop();
       printfQuda("Time -> %.6f s\n", host_timer_1.last());
     }
     // If we performed a specific instance, switch off the
     // Google testing.
     if (test_type != 0) execute = false;
+    saveTuneCache();
   }
 
   virtual void run()
@@ -303,22 +324,48 @@ class GaugeAlgTest : public ::testing::Test
       fix_param = newQudaGaugeFixParam();
       setGaugeFixParam(fix_param);
       
-      // Setup gauge container.
+      printfQuda("%s gauge fixing with %s method\n", fix_param.gauge_dir == 4 ? "Landau" : "Coulomb", get_gaugefix_str(fix_param.fix_type));
+
+      // Setup CPU gauge container.
       gauge_param = newQudaGaugeParam();
       setWilsonGaugeParam(gauge_param);
       gauge_param.t_boundary = QUDA_PERIODIC_T;
-      gauge_param.location = QUDA_CUDA_FIELD_LOCATION;
+      gauge_param.location = QUDA_CPU_FIELD_LOCATION;
       
-      //GaugeFieldParam param(*U);
-      printfQuda("%s gauge fixing with %s method\n", fix_param.gauge_dir == 4 ? "Landau" : "Coulomb", get_gaugefix_str(fix_param.fix_type));
+      void *cpu_gauge[4];
+      for (int dir = 0; dir < 4; dir++) { cpu_gauge[dir] = safe_malloc(V * gauge_site_size * gauge_param.cpu_prec); }
       
-      computeGaugeFixingQuda(U->Gauge_p(), &gauge_param, &fix_param, nullptr);
+      GaugeFieldParam param(gauge_param);
+      param.ghostExchange = QUDA_GHOST_EXCHANGE_NO;
+      param.create = QUDA_NULL_FIELD_CREATE;
+      param.link_type = gauge_param.type;
+      param.reconstruct = gauge_param.reconstruct;
+      param.setPrecision(param.Precision(), true);
       
-      auto plaq_gf = plaquette(*U);
-      printfQuda("Plaq:    %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z);
-      printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
-      ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
-      saveTuneCache();
+      auto *gauge = new cudaGaugeField(param);
+      
+      // Copy the target U field (extended) into regular GPU field, then
+      // save to a CPU field. This is done to test the CPU interface function
+      // and instructs the user how to use void pointers for the gauge data,
+      // and the gauge_param container.
+      copyExtendedGauge(*gauge, *U, QUDA_CUDA_FIELD_LOCATION);
+      saveGaugeFieldQuda((void *)cpu_gauge, (void *)gauge, &gauge_param);
+      delete gauge;
+      
+      // Compute gauge fixing via interface
+      computeGaugeFixingQuda(cpu_gauge, &gauge_param, &fix_param, nullptr);
+
+      // cpu_gauge now contains the fixed gauge on the CPU. We now load that gauge
+      // to the device for inspection in the TearDown.
+      GaugeFieldParam fixed_param(gauge_param, cpu_gauge);
+      auto *fixed_cpu_gauge = new cpuGaugeField(fixed_param);
+      
+      // Copy the CPU field to U.
+      U->loadCPUField(*fixed_cpu_gauge);     
+
+      for (int dir = 0; dir < 4; dir++) host_free(cpu_gauge[dir]);
+      delete fixed_cpu_gauge;
+	
       // Save if output string is specified
       if (gauge_store) save_gauge();
     }
@@ -328,9 +375,6 @@ class GaugeAlgTest : public ::testing::Test
   {
     printfQuda("Saving the gauge field to file %s\n", gauge_outfile);
 
-    //QudaGaugeParam gauge_param = newQudaGaugeParam();
-    //setWilsonGaugeParam(gauge_param);
-
     void *cpu_gauge[4];
     for (int dir = 0; dir < 4; dir++) { cpu_gauge[dir] = safe_malloc(V * gauge_site_size * gauge_param.cpu_prec); }
 
@@ -359,8 +403,11 @@ class GaugeAlgTest : public ::testing::Test
 TEST_F(GaugeAlgTest, Generation)
 {
   if (execute && !gauge_load) {
-    detu = getLinkDeterminant(*U);
-    ASSERT_TRUE(CheckDeterminant(detu));
+    det_u = getLinkDeterminant(*U);
+    //trace_u = getLinkTrace(*U);
+    //printfQuda("Det: %.16e:%.16e\n", det_u.x, det_u.y);
+    //printfQuda("Tr: %.16e:%.16e\n", trace_u.x / 3.0, trace_u.y / 3.0);
+    ASSERT_TRUE(CheckDeterminant(det_u));
   }
 }
 
@@ -375,12 +422,7 @@ TEST_F(GaugeAlgTest, Landau_Overrelaxation)
     fix_param.fix_type = QUDA_GAUGEFIX_TYPE_OVR;
     fix_param.gauge_dir = 4;
     
-    gaugeFixingOVR(*U, fix_param);
-    auto plaq_gf = plaquette(*U);
-    printfQuda("Plaq:    %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z);
-    printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
-    ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
-    saveTuneCache();
+    gaugeFixingOVR(*U, fix_param);    
   }
 }
 
@@ -395,12 +437,7 @@ TEST_F(GaugeAlgTest, Coulomb_Overrelaxation)
     fix_param.fix_type = QUDA_GAUGEFIX_TYPE_OVR;
     fix_param.gauge_dir = 3;
 
-    gaugeFixingOVR(*U, fix_param);
-    auto plaq_gf = plaquette(*U);
-    printfQuda("Plaq:    %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z);
-    printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
-    ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
-    saveTuneCache();
+    gaugeFixingOVR(*U, fix_param);    
   }
 }
 
@@ -417,11 +454,6 @@ TEST_F(GaugeAlgTest, Landau_FFT)
       fix_param.gauge_dir = 4;
     
       gaugeFixingFFT(*U, fix_param);
-      auto plaq_gf = plaquette(*U);
-      printfQuda("Plaq:    %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z);
-      printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
-      ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
-      saveTuneCache();
     }
   }
 }
@@ -439,11 +471,6 @@ TEST_F(GaugeAlgTest, Coulomb_FFT)
       fix_param.gauge_dir = 3;
 
       gaugeFixingFFT(*U, fix_param);
-      auto plaq_gf = plaquette(*U);
-      printfQuda("Plaq:    %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z);
-      printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z);
-      ASSERT_TRUE(comparePlaquette(plaq, plaq_gf));
-      saveTuneCache();
     }
   }
 }
@@ -470,10 +497,11 @@ int main(int argc, char **argv)
   // initialize QMP/MPI, QUDA comms grid and RNG (host_utils.cpp)
   initComms(argc, argv, gridsize_from_cmdline);
   QudaGaugeParam gauge_param = newQudaGaugeParam();
-  setVerbosity(QUDA_VERBOSE);
+  setVerbosity(verbosity);
   setQudaPrecisions();
   setWilsonGaugeParam(gauge_param);
   setDims(gauge_param.X);
+  
   // call srand() with a rank-dependent seed
   initRand();
   // initialize the QUDA library

From 0e020b970cf000910e1f251d80ae053c04d6ee7f Mon Sep 17 00:00:00 2001
From: cpviolator <dmhowarth26@gmail.com>
Date: Tue, 21 Dec 2021 18:55:37 -0800
Subject: [PATCH 28/32] Set gauge fixing parameters once only, use a CL
 supplied seed (updated other tests to use a CL supplied seed)

---
 tests/gauge_alg_test.cpp            | 31 ++++++++++-------------------
 tests/heatbath_test.cpp             |  2 +-
 tests/invert_test.cpp               |  2 +-
 tests/multigrid_evolve_test.cpp     |  2 +-
 tests/staggered_invert_test.cpp     |  2 +-
 tests/utils/command_line_params.cpp |  6 ++++++
 tests/utils/command_line_params.h   |  1 +
 tests/utils/host_utils.cpp          |  2 +-
 8 files changed, 23 insertions(+), 25 deletions(-)

diff --git a/tests/gauge_alg_test.cpp b/tests/gauge_alg_test.cpp
index 7648e64321..91e9068e58 100644
--- a/tests/gauge_alg_test.cpp
+++ b/tests/gauge_alg_test.cpp
@@ -195,7 +195,7 @@ class GaugeAlgTest : public ::testing::Test
 
         U = new cudaGaugeField(device_gauge_param);
 
-        RNG randstates(*U, 1234);
+        RNG randstates(*U, quda_seed);
 
         nsteps = heatbath_num_steps;
         nhbsteps = heatbath_num_heatbath_per_step;
@@ -271,6 +271,10 @@ class GaugeAlgTest : public ::testing::Test
       // turn off all Google tests in the tear down.
       switch (test_type) {
       case 0: // Do the Google testing
+	// Set gauge fixing params from the command line
+	// and adjust for this test type
+	fix_param = newQudaGaugeFixParam();
+	setGaugeFixParam(fix_param);
 	break;
       case 1: // Do a specific test
 	run(); break;
@@ -403,10 +407,9 @@ class GaugeAlgTest : public ::testing::Test
 TEST_F(GaugeAlgTest, Generation)
 {
   if (execute && !gauge_load) {
+    // Assert that the generated gauge is
+    // on the SU(N) manifold
     det_u = getLinkDeterminant(*U);
-    //trace_u = getLinkTrace(*U);
-    //printfQuda("Det: %.16e:%.16e\n", det_u.x, det_u.y);
-    //printfQuda("Tr: %.16e:%.16e\n", trace_u.x / 3.0, trace_u.y / 3.0);
     ASSERT_TRUE(CheckDeterminant(det_u));
   }
 }
@@ -415,10 +418,7 @@ TEST_F(GaugeAlgTest, Landau_Overrelaxation)
 {
   if (execute) {
     printfQuda("Landau gauge fixing with overrelaxation\n");
-    // Set gauge fixing params from the command line
-    // and adjust for this test type
-    fix_param = newQudaGaugeFixParam();
-    setGaugeFixParam(fix_param);
+    
     fix_param.fix_type = QUDA_GAUGEFIX_TYPE_OVR;
     fix_param.gauge_dir = 4;
     
@@ -430,10 +430,7 @@ TEST_F(GaugeAlgTest, Coulomb_Overrelaxation)
 {
   if (execute) {
     printfQuda("Coulomb gauge fixing with overrelaxation\n");
-    // Use gauge fixing params from the command line
-    // and adjust for this test type
-    fix_param = newQudaGaugeFixParam();
-    setGaugeFixParam(fix_param);
+    
     fix_param.fix_type = QUDA_GAUGEFIX_TYPE_OVR;
     fix_param.gauge_dir = 3;
 
@@ -446,10 +443,7 @@ TEST_F(GaugeAlgTest, Landau_FFT)
   if (execute) {
     if (!comm_partitioned()) {
       printfQuda("Landau gauge fixing with steepest descent method with FFT\n");
-      // Set gauge fixing params from the command line
-      // and adjust for this test type
-      fix_param = newQudaGaugeFixParam();
-      setGaugeFixParam(fix_param);
+      
       fix_param.fix_type = QUDA_GAUGEFIX_TYPE_FFT;
       fix_param.gauge_dir = 4;
     
@@ -463,10 +457,7 @@ TEST_F(GaugeAlgTest, Coulomb_FFT)
   if (execute) {
     if (!comm_partitioned()) {
       printfQuda("Coulomb gauge fixing with steepest descent method with FFT\n");
-      // Set gauge fixing params from the command line
-      // and adjust for this test type
-      fix_param = newQudaGaugeFixParam();
-      setGaugeFixParam(fix_param);
+      
       fix_param.fix_type = QUDA_GAUGEFIX_TYPE_FFT;
       fix_param.gauge_dir = 3;
 
diff --git a/tests/heatbath_test.cpp b/tests/heatbath_test.cpp
index 033ff597df..300f0b1012 100644
--- a/tests/heatbath_test.cpp
+++ b/tests/heatbath_test.cpp
@@ -125,7 +125,7 @@ int main(int argc, char **argv)
     for(int dir=0; dir<4; ++dir) gParamEx.r[dir] = R[dir];
     cudaGaugeField *gaugeEx = new cudaGaugeField(gParamEx);
     // CURAND random generator initialization
-    RNG *randstates = new RNG(*gauge, 1234);
+    RNG *randstates = new RNG(*gauge, quda_seed);
 
     int nsteps = heatbath_num_steps;
     int nwarm = heatbath_warmup_steps;
diff --git a/tests/invert_test.cpp b/tests/invert_test.cpp
index e559f04520..1ba40618c9 100644
--- a/tests/invert_test.cpp
+++ b/tests/invert_test.cpp
@@ -304,7 +304,7 @@ int main(int argc, char **argv)
   std::vector<double> gflops(Nsrc);
   std::vector<int> iter(Nsrc);
 
-  auto *rng = new quda::RNG(*check, 1234);
+  auto *rng = new quda::RNG(*check, quda_seed);
 
   for (int i = 0; i < Nsrc; i++) {
     // Populate the host spinor with random numbers.
diff --git a/tests/multigrid_evolve_test.cpp b/tests/multigrid_evolve_test.cpp
index de6e3f22d9..fb63443ae8 100644
--- a/tests/multigrid_evolve_test.cpp
+++ b/tests/multigrid_evolve_test.cpp
@@ -244,7 +244,7 @@ int main(int argc, char **argv)
     obs_param.compute_qcharge = QUDA_BOOLEAN_TRUE;
 
     // CURAND random generator initialization
-    RNG *randstates = new RNG(*gauge, 1234);
+    RNG *randstates = new RNG(*gauge, quda_seed);
     int nsteps = 10;
     int nhbsteps = 1;
     int novrsteps = 1;
diff --git a/tests/staggered_invert_test.cpp b/tests/staggered_invert_test.cpp
index b2f4e96588..a0a39c7adb 100644
--- a/tests/staggered_invert_test.cpp
+++ b/tests/staggered_invert_test.cpp
@@ -299,7 +299,7 @@ int main(int argc, char **argv)
   //-----------------------------------------------------------------------------------
 
   // Prepare rng
-  auto *rng = new quda::RNG(*ref, 1234);
+  auto *rng = new quda::RNG(*ref, quda_seed);
 
   // Performance measuring
   std::vector<double> time(Nsrc);
diff --git a/tests/utils/command_line_params.cpp b/tests/utils/command_line_params.cpp
index 5dd4ab8665..1201590521 100644
--- a/tests/utils/command_line_params.cpp
+++ b/tests/utils/command_line_params.cpp
@@ -9,6 +9,8 @@ int device_ordinal = -1;
 int device_ordinal = 0;
 #endif
 
+int quda_seed = 1234;
+
 int rank_order;
 std::array<int, 4> gridsize_from_cmdline = {1, 1, 1, 1};
 auto &grid_x = gridsize_from_cmdline[0];
@@ -501,6 +503,10 @@ std::shared_ptr<QUDAApp> make_app(std::string app_description, std::string app_n
   quda_app->add_option("--save-gauge", gauge_outfile,
                        "Save gauge field \" file \" for the test (requires QIO, heatbath test only)");
 
+  quda_app->add_option("--seed", quda_seed,
+		       "Seed value for use in test suite (default 1234)")
+    ->check(CLI::PositiveNumber);
+  
   quda_app->add_option("--solution-pipeline", solution_accumulator_pipeline,
                        "The pipeline length for fused solution accumulation (default 0, no pipelining)");
 
diff --git a/tests/utils/command_line_params.h b/tests/utils/command_line_params.h
index 10c8d775f2..77ac51f4cf 100644
--- a/tests/utils/command_line_params.h
+++ b/tests/utils/command_line_params.h
@@ -151,6 +151,7 @@ template <typename T> std::string inline get_string(CLI::TransformPairs<T> &map,
 // }
 // parameters
 
+extern int quda_seed;
 extern int device_ordinal;
 extern int rank_order;
 extern bool native_blas_lapack;
diff --git a/tests/utils/host_utils.cpp b/tests/utils/host_utils.cpp
index c961c85e0e..ff3d60ced5 100644
--- a/tests/utils/host_utils.cpp
+++ b/tests/utils/host_utils.cpp
@@ -320,7 +320,7 @@ void initRand()
   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 #endif
 
-  srand(17 * rank + 137);
+  srand(17 * rank + 137 + quda_seed);
 }
 
 void setDims(int *X)

From 56725151650cad3ac0047bf76be8e84b0642d0ed Mon Sep 17 00:00:00 2001
From: cpviolator <dmhowarth26@gmail.com>
Date: Tue, 21 Dec 2021 19:02:13 -0800
Subject: [PATCH 29/32] Develop merge, clang tidy

---
 include/gauge_tools.h               |  2 +-
 include/quda.h                      |  2 +-
 lib/check_params.h                  |  2 +-
 lib/interface_quda.cpp              | 21 +++----
 tests/gauge_alg_test.cpp            | 85 ++++++++++++++++-------------
 tests/utils/command_line_params.cpp |  6 +-
 tests/utils/misc.cpp                |  2 +-
 7 files changed, 63 insertions(+), 57 deletions(-)

diff --git a/include/gauge_tools.h b/include/gauge_tools.h
index b53134ecfc..e2393cf239 100644
--- a/include/gauge_tools.h
+++ b/include/gauge_tools.h
@@ -116,7 +116,7 @@ namespace quda
    * @param[in] fix_param Parameter struct that defines the gauge fixing
    */
   void gaugeFixingOVR(GaugeField &data, QudaGaugeFixParam &fix_param);
-  
+
   /**
    * @brief Gauge fixing with Steepest descent method with FFTs with support for single GPU only.
    * @param[in,out] data, quda gauge field
diff --git a/include/quda.h b/include/quda.h
index 02b8601809..0b7d633ec9 100644
--- a/include/quda.h
+++ b/include/quda.h
@@ -777,7 +777,7 @@ extern "C" {
     QudaBLASDataType data_type;   /**< Specifies if using S(C) or D(Z) BLAS type */
     QudaBLASDataOrder data_order; /**< Specifies if using Row or Column major */
   } QudaBLASParam;
-    
+
   typedef struct QudaGaugeFixParam_s {
     size_t struct_size; /**< Size of this struct in bytes.  Used to ensure that the host application and QUDA see the same struct size */
 
diff --git a/lib/check_params.h b/lib/check_params.h
index 0f2982f8eb..b49cf04cb0 100644
--- a/lib/check_params.h
+++ b/lib/check_params.h
@@ -1079,7 +1079,7 @@ void printQudaGaugeFixParam(QudaGaugeFixParam *param)
   P(fft_autotune, QUDA_BOOLEAN_FALSE);
   P(theta_condition, QUDA_BOOLEAN_FALSE);
 #endif
-  
+
 #ifdef INIT_PARAM
   return ret;
 #endif
diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp
index d92b1da7b6..28fe842090 100644
--- a/lib/interface_quda.cpp
+++ b/lib/interface_quda.cpp
@@ -5528,15 +5528,16 @@ int computeGaugeFixingQuda(void *gauge, QudaGaugeParam *g_param, QudaGaugeFixPar
     printQudaGaugeParam(g_param);
     printQudaGaugeFixParam(fix_param);
   }
-  
+
   // Check parameters
   checkGaugeParam(g_param);
   checkGaugeFixParam(fix_param);
 
-  if(g_param->location == QUDA_CUDA_FIELD_LOCATION) {
-    errorQuda("GPU gauge fixing not supported via QUDA interface. Please use direct kernel call: gaugeFixingOVR/gaugeFixingFFT");
+  if (g_param->location == QUDA_CUDA_FIELD_LOCATION) {
+    errorQuda("GPU gauge fixing not supported via QUDA interface. Please use direct kernel call: "
+              "gaugeFixingOVR/gaugeFixingFFT");
   }
-  
+
   profileGaugeFix.TPSTART(QUDA_PROFILE_INIT);
   GaugeFieldParam gauge_param(*g_param, gauge);
   auto *cpu_gauge = new cpuGaugeField(gauge_param);
@@ -5546,14 +5547,14 @@ int computeGaugeFixingQuda(void *gauge, QudaGaugeParam *g_param, QudaGaugeFixPar
   gauge_param.link_type = g_param->type;
   gauge_param.reconstruct = g_param->reconstruct;
   gauge_param.setPrecision(gauge_param.Precision(), true);
-  auto *device_gauge = new cudaGaugeField(gauge_param);    
+  auto *device_gauge = new cudaGaugeField(gauge_param);
   profileGaugeFix.TPSTOP(QUDA_PROFILE_INIT);
-    
+
   // Load gauge to device
   profileGaugeFix.TPSTART(QUDA_PROFILE_H2D);
   device_gauge->loadCPUField(*cpu_gauge);
   profileGaugeFix.TPSTOP(QUDA_PROFILE_H2D);
-  
+
   // Perform the update
   switch (fix_param->fix_type) {
 
@@ -5585,7 +5586,7 @@ int computeGaugeFixingQuda(void *gauge, QudaGaugeParam *g_param, QudaGaugeFixPar
   profileGaugeFix.TPSTART(QUDA_PROFILE_D2H);
   device_gauge->saveCPUField(*cpu_gauge);
   profileGaugeFix.TPSTOP(QUDA_PROFILE_D2H);
-    
+
   profileGaugeFix.TPSTOP(QUDA_PROFILE_TOTAL);
 
   if (g_param->make_resident_gauge) {
@@ -5595,8 +5596,8 @@ int computeGaugeFixingQuda(void *gauge, QudaGaugeParam *g_param, QudaGaugeFixPar
     delete device_gauge;
   }
   delete cpu_gauge;
-  
-  if(timeinfo){
+
+  if (timeinfo) {
     timeinfo[0] = profileGaugeFix.Last(QUDA_PROFILE_H2D);
     timeinfo[1] = profileGaugeFix.Last(QUDA_PROFILE_COMPUTE);
     timeinfo[2] = profileGaugeFix.Last(QUDA_PROFILE_D2H);
diff --git a/tests/gauge_alg_test.cpp b/tests/gauge_alg_test.cpp
index 91e9068e58..5506f968d7 100644
--- a/tests/gauge_alg_test.cpp
+++ b/tests/gauge_alg_test.cpp
@@ -56,10 +56,12 @@ void display_test_info()
 
   switch (test_type) {
   case 0: printfQuda("\n Google testing\n"); break;
-  case 1: printfQuda("\n%s %s gauge fix\n", get_gaugefix_str(gf_fix_type), gf_gauge_dir == 4 ? "Landau" : "Coulomb"); break;
+  case 1:
+    printfQuda("\n%s %s gauge fix\n", get_gaugefix_str(gf_fix_type), gf_gauge_dir == 4 ? "Landau" : "Coulomb");
+    break;
   default: errorQuda("Undefined test type %d given", test_type);
   }
-  
+
   printfQuda("prec    sloppy_prec    link_recon  sloppy_link_recon S_dimension T_dimension Ls_dimension\n");
   printfQuda("%s   %s             %s            %s            %d/%d/%d          %d         %d\n", get_prec_str(prec),
              get_prec_str(prec_sloppy), get_recon_str(link_recon), get_recon_str(link_recon_sloppy), xdim, ydim, zdim,
@@ -97,7 +99,8 @@ void add_gaugefix_option_group(std::shared_ptr<QUDAApp> quda_app)
     ->transform(CLI::QUDACheckedTransformer(fix_type_map));
 }
 
-void setGaugeFixParam(QudaGaugeFixParam &fix_param) {
+void setGaugeFixParam(QudaGaugeFixParam &fix_param)
+{
   fix_param.fix_type = gf_fix_type;
   fix_param.gauge_dir = gf_gauge_dir;
   fix_param.maxiter = gf_maxiter;
@@ -168,7 +171,7 @@ class GaugeAlgTest : public ::testing::Test
   virtual void SetUp()
   {
     if (execute) {
-      
+
       // Setup gauge container.
       gauge_param = newQudaGaugeParam();
       setWilsonGaugeParam(gauge_param);
@@ -183,7 +186,7 @@ class GaugeAlgTest : public ::testing::Test
 
       // If no field is loaded, create a physical quenched field on the device
       if (!gauge_load) {
-	GaugeFieldParam device_gauge_param(gauge_param);
+        GaugeFieldParam device_gauge_param(gauge_param);
         device_gauge_param.ghostExchange = QUDA_GHOST_EXCHANGE_EXTENDED;
         device_gauge_param.create = QUDA_NULL_FIELD_CREATE;
         device_gauge_param.reconstruct = link_recon;
@@ -203,10 +206,12 @@ class GaugeAlgTest : public ::testing::Test
         coldstart = heatbath_coldstart;
         beta_value = heatbath_beta_value;
         host_timer_2.start();
-	
-        if (coldstart) InitGaugeField(*U);
-        else InitGaugeField(*U, randstates);
-	
+
+        if (coldstart)
+          InitGaugeField(*U);
+        else
+          InitGaugeField(*U, randstates);
+
         for (int step = 1; step <= nsteps; ++step) {
           printfQuda("Step %d\n", step);
           Monte(*U, randstates, beta_value, nhbsteps, novrsteps);
@@ -265,19 +270,20 @@ class GaugeAlgTest : public ::testing::Test
       trace_u = getLinkTrace(*U);
       printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_u.x, plaq_u.y, plaq_u.z);
       printfQuda("Det: %.16e:%.16e\n", det_u.x, det_u.y);
-      printfQuda("Tr: %.16e:%.16e\n", trace_u.x / 3.0, trace_u.y / 3.0);      
-      
+      printfQuda("Tr: %.16e:%.16e\n", trace_u.x / 3.0, trace_u.y / 3.0);
+
       // If a specific test type is requested, perform it now and then
       // turn off all Google tests in the tear down.
       switch (test_type) {
       case 0: // Do the Google testing
-	// Set gauge fixing params from the command line
-	// and adjust for this test type
-	fix_param = newQudaGaugeFixParam();
-	setGaugeFixParam(fix_param);
-	break;
+        // Set gauge fixing params from the command line
+        // and adjust for this test type
+        fix_param = newQudaGaugeFixParam();
+        setGaugeFixParam(fix_param);
+        break;
       case 1: // Do a specific test
-	run(); break;
+        run();
+        break;
       default: errorQuda("Invalid test type %d ", test_type);
       }
 
@@ -311,7 +317,7 @@ class GaugeAlgTest : public ::testing::Test
       delete U;
       // Release all temporary memory used for data exchange between GPUs in multi-GPU mode
       PGaugeExchangeFree();
-      
+
       host_timer_1.stop();
       printfQuda("Time -> %.6f s\n", host_timer_1.last());
     }
@@ -327,27 +333,28 @@ class GaugeAlgTest : public ::testing::Test
       // Set gauge fixing params from the command line
       fix_param = newQudaGaugeFixParam();
       setGaugeFixParam(fix_param);
-      
-      printfQuda("%s gauge fixing with %s method\n", fix_param.gauge_dir == 4 ? "Landau" : "Coulomb", get_gaugefix_str(fix_param.fix_type));
+
+      printfQuda("%s gauge fixing with %s method\n", fix_param.gauge_dir == 4 ? "Landau" : "Coulomb",
+                 get_gaugefix_str(fix_param.fix_type));
 
       // Setup CPU gauge container.
       gauge_param = newQudaGaugeParam();
       setWilsonGaugeParam(gauge_param);
       gauge_param.t_boundary = QUDA_PERIODIC_T;
       gauge_param.location = QUDA_CPU_FIELD_LOCATION;
-      
+
       void *cpu_gauge[4];
       for (int dir = 0; dir < 4; dir++) { cpu_gauge[dir] = safe_malloc(V * gauge_site_size * gauge_param.cpu_prec); }
-      
+
       GaugeFieldParam param(gauge_param);
       param.ghostExchange = QUDA_GHOST_EXCHANGE_NO;
       param.create = QUDA_NULL_FIELD_CREATE;
       param.link_type = gauge_param.type;
       param.reconstruct = gauge_param.reconstruct;
       param.setPrecision(param.Precision(), true);
-      
+
       auto *gauge = new cudaGaugeField(param);
-      
+
       // Copy the target U field (extended) into regular GPU field, then
       // save to a CPU field. This is done to test the CPU interface function
       // and instructs the user how to use void pointers for the gauge data,
@@ -355,7 +362,7 @@ class GaugeAlgTest : public ::testing::Test
       copyExtendedGauge(*gauge, *U, QUDA_CUDA_FIELD_LOCATION);
       saveGaugeFieldQuda((void *)cpu_gauge, (void *)gauge, &gauge_param);
       delete gauge;
-      
+
       // Compute gauge fixing via interface
       computeGaugeFixingQuda(cpu_gauge, &gauge_param, &fix_param, nullptr);
 
@@ -363,18 +370,18 @@ class GaugeAlgTest : public ::testing::Test
       // to the device for inspection in the TearDown.
       GaugeFieldParam fixed_param(gauge_param, cpu_gauge);
       auto *fixed_cpu_gauge = new cpuGaugeField(fixed_param);
-      
+
       // Copy the CPU field to U.
-      U->loadCPUField(*fixed_cpu_gauge);     
+      U->loadCPUField(*fixed_cpu_gauge);
 
       for (int dir = 0; dir < 4; dir++) host_free(cpu_gauge[dir]);
       delete fixed_cpu_gauge;
-	
+
       // Save if output string is specified
       if (gauge_store) save_gauge();
     }
   }
-  
+
   virtual void save_gauge()
   {
     printfQuda("Saving the gauge field to file %s\n", gauge_outfile);
@@ -418,11 +425,11 @@ TEST_F(GaugeAlgTest, Landau_Overrelaxation)
 {
   if (execute) {
     printfQuda("Landau gauge fixing with overrelaxation\n");
-    
+
     fix_param.fix_type = QUDA_GAUGEFIX_TYPE_OVR;
     fix_param.gauge_dir = 4;
-    
-    gaugeFixingOVR(*U, fix_param);    
+
+    gaugeFixingOVR(*U, fix_param);
   }
 }
 
@@ -430,11 +437,11 @@ TEST_F(GaugeAlgTest, Coulomb_Overrelaxation)
 {
   if (execute) {
     printfQuda("Coulomb gauge fixing with overrelaxation\n");
-    
+
     fix_param.fix_type = QUDA_GAUGEFIX_TYPE_OVR;
     fix_param.gauge_dir = 3;
 
-    gaugeFixingOVR(*U, fix_param);    
+    gaugeFixingOVR(*U, fix_param);
   }
 }
 
@@ -443,10 +450,10 @@ TEST_F(GaugeAlgTest, Landau_FFT)
   if (execute) {
     if (!comm_partitioned()) {
       printfQuda("Landau gauge fixing with steepest descent method with FFT\n");
-      
+
       fix_param.fix_type = QUDA_GAUGEFIX_TYPE_FFT;
       fix_param.gauge_dir = 4;
-    
+
       gaugeFixingFFT(*U, fix_param);
     }
   }
@@ -457,7 +464,7 @@ TEST_F(GaugeAlgTest, Coulomb_FFT)
   if (execute) {
     if (!comm_partitioned()) {
       printfQuda("Coulomb gauge fixing with steepest descent method with FFT\n");
-      
+
       fix_param.fix_type = QUDA_GAUGEFIX_TYPE_FFT;
       fix_param.gauge_dir = 3;
 
@@ -492,12 +499,12 @@ int main(int argc, char **argv)
   setQudaPrecisions();
   setWilsonGaugeParam(gauge_param);
   setDims(gauge_param.X);
-  
+
   // call srand() with a rank-dependent seed
   initRand();
   // initialize the QUDA library
   initQuda(device_ordinal);
-  
+
   display_test_info();
 
   // If we are passing a gauge field to the test, we must allocate host memory.
diff --git a/tests/utils/command_line_params.cpp b/tests/utils/command_line_params.cpp
index 1201590521..4a500bdab7 100644
--- a/tests/utils/command_line_params.cpp
+++ b/tests/utils/command_line_params.cpp
@@ -503,10 +503,8 @@ std::shared_ptr<QUDAApp> make_app(std::string app_description, std::string app_n
   quda_app->add_option("--save-gauge", gauge_outfile,
                        "Save gauge field \" file \" for the test (requires QIO, heatbath test only)");
 
-  quda_app->add_option("--seed", quda_seed,
-		       "Seed value for use in test suite (default 1234)")
-    ->check(CLI::PositiveNumber);
-  
+  quda_app->add_option("--seed", quda_seed, "Seed value for use in test suite (default 1234)")->check(CLI::PositiveNumber);
+
   quda_app->add_option("--solution-pipeline", solution_accumulator_pipeline,
                        "The pipeline length for fused solution accumulation (default 0, no pipelining)");
 
diff --git a/tests/utils/misc.cpp b/tests/utils/misc.cpp
index d9c0537bc3..714b75f862 100644
--- a/tests/utils/misc.cpp
+++ b/tests/utils/misc.cpp
@@ -160,7 +160,7 @@ const char *get_gaugefix_str(QudaGaugeFixType type)
   case QUDA_GAUGEFIX_TYPE_FFT: ret = "FFT"; break;
   default: ret = "unknown"; break;
   }
-  
+
   return ret;
 }
 

From 984c45a8208db3c5d78f6de0fd4df1a5663bc4ac Mon Sep 17 00:00:00 2001
From: cpviolator <dmhowarth26@gmail.com>
Date: Tue, 21 Dec 2021 23:24:10 -0800
Subject: [PATCH 30/32] Add precision parameter to QudaGaugeFixParam to specify
 gauge fixing precision, fix bug where gauge field boundary is not set to
 periodic

---
 include/quda.h             |  2 ++
 lib/gauge_fix_ovr.cu       |  1 +
 lib/interface_quda.cpp     |  2 +-
 tests/gauge_alg_test.cpp   | 23 +++++++++++++++--------
 tests/utils/host_utils.cpp |  7 +++++--
 5 files changed, 24 insertions(+), 11 deletions(-)

diff --git a/include/quda.h b/include/quda.h
index 0b7d633ec9..a706b46090 100644
--- a/include/quda.h
+++ b/include/quda.h
@@ -792,6 +792,8 @@ extern "C" {
     double tolerance;            /**< The tolerance of the gauge fixing quality (default 1e-6) */
     QudaBoolean theta_condition; /**< "Use the theta value to determine the gauge fixing if true. If false, use the
                                     delta value (default false)" */
+    QudaPrecision precision;     /**< The precision used by the algorithm */
+    
   } QudaGaugeFixParam;
 
   /*
diff --git a/lib/gauge_fix_ovr.cu b/lib/gauge_fix_ovr.cu
index aff0df2fef..38b7948f99 100644
--- a/lib/gauge_fix_ovr.cu
+++ b/lib/gauge_fix_ovr.cu
@@ -245,6 +245,7 @@ namespace quda {
       printfQuda("\tMaximum number of iterations: %d\n", steps);
       printfQuda("\tReunitarize at every %d steps\n", reunit_interval);
       printfQuda("\tPrint convergence results at every %d steps\n", verbose_interval);
+      printfQuda("\tComputing in %s precision\n", sizeof(Float) == sizeof(double) ? "double" : "single");
     }
     
     const double unitarize_eps = 1e-14;
diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp
index 28fe842090..59c63d9b1a 100644
--- a/lib/interface_quda.cpp
+++ b/lib/interface_quda.cpp
@@ -5546,7 +5546,7 @@ int computeGaugeFixingQuda(void *gauge, QudaGaugeParam *g_param, QudaGaugeFixPar
   gauge_param.create = QUDA_NULL_FIELD_CREATE;
   gauge_param.link_type = g_param->type;
   gauge_param.reconstruct = g_param->reconstruct;
-  gauge_param.setPrecision(gauge_param.Precision(), true);
+  gauge_param.setPrecision(fix_param->precision, true);
   auto *device_gauge = new cudaGaugeField(gauge_param);
   profileGaugeFix.TPSTOP(QUDA_PROFILE_INIT);
 
diff --git a/tests/gauge_alg_test.cpp b/tests/gauge_alg_test.cpp
index 5506f968d7..c64011341f 100644
--- a/tests/gauge_alg_test.cpp
+++ b/tests/gauge_alg_test.cpp
@@ -111,6 +111,7 @@ void setGaugeFixParam(QudaGaugeFixParam &fix_param)
   fix_param.fft_alpha = gf_fft_alpha;
   fix_param.fft_autotune = gf_fft_alpha ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE;
   fix_param.theta_condition = gf_theta_condition ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE;
+  fix_param.precision = cuda_prec;
 }
 
 class GaugeAlgTest : public ::testing::Test
@@ -161,11 +162,11 @@ class GaugeAlgTest : public ::testing::Test
     return ((a0 < prec_val) && (a1 < prec_val) && (a2 < prec_val));
   }
 
-  bool CheckDeterminant(double2 detu)
+  bool CheckDeterminant(double2 det)
   {
-    double prec_val = 5e-8;
+    double prec_val = 1.0e-5;
     if (prec == QUDA_DOUBLE_PRECISION) prec_val = gf_tolerance * 1e2;
-    return (std::abs(1.0 - detu.x) < prec_val && std::abs(detu.y) < prec_val);
+    return (std::abs(1.0 - det.x) < prec_val && std::abs(det.y) < prec_val);
   }
 
   virtual void SetUp()
@@ -190,7 +191,7 @@ class GaugeAlgTest : public ::testing::Test
         device_gauge_param.ghostExchange = QUDA_GHOST_EXCHANGE_EXTENDED;
         device_gauge_param.create = QUDA_NULL_FIELD_CREATE;
         device_gauge_param.reconstruct = link_recon;
-        device_gauge_param.setPrecision(prec, true);
+        device_gauge_param.setPrecision(cuda_prec, true);
         for (int d = 0; d < 4; d++) {
           if (comm_dim_partitioned(d)) device_gauge_param.r[d] = 2;
           device_gauge_param.x[d] += 2 * device_gauge_param.r[d];
@@ -344,14 +345,14 @@ class GaugeAlgTest : public ::testing::Test
       gauge_param.location = QUDA_CPU_FIELD_LOCATION;
 
       void *cpu_gauge[4];
-      for (int dir = 0; dir < 4; dir++) { cpu_gauge[dir] = safe_malloc(V * gauge_site_size * gauge_param.cpu_prec); }
+      for (int dir = 0; dir < 4; dir++) { cpu_gauge[dir] = safe_malloc(V * gauge_site_size * cpu_prec); }
 
       GaugeFieldParam param(gauge_param);
       param.ghostExchange = QUDA_GHOST_EXCHANGE_NO;
       param.create = QUDA_NULL_FIELD_CREATE;
       param.link_type = gauge_param.type;
       param.reconstruct = gauge_param.reconstruct;
-      param.setPrecision(param.Precision(), true);
+      param.setPrecision(cuda_prec, true);
 
       auto *gauge = new cudaGaugeField(param);
 
@@ -379,6 +380,7 @@ class GaugeAlgTest : public ::testing::Test
 
       // Save if output string is specified
       if (gauge_store) save_gauge();
+      saveTuneCache();
     }
   }
 
@@ -424,24 +426,26 @@ TEST_F(GaugeAlgTest, Generation)
 TEST_F(GaugeAlgTest, Landau_Overrelaxation)
 {
   if (execute) {
-    printfQuda("Landau gauge fixing with overrelaxation\n");
+    printfQuda("Landau gauge fixing with overrelaxation method\n");
 
     fix_param.fix_type = QUDA_GAUGEFIX_TYPE_OVR;
     fix_param.gauge_dir = 4;
 
     gaugeFixingOVR(*U, fix_param);
+    saveTuneCache();
   }
 }
 
 TEST_F(GaugeAlgTest, Coulomb_Overrelaxation)
 {
   if (execute) {
-    printfQuda("Coulomb gauge fixing with overrelaxation\n");
+    printfQuda("Coulomb gauge fixing with overrelaxation method\n");
 
     fix_param.fix_type = QUDA_GAUGEFIX_TYPE_OVR;
     fix_param.gauge_dir = 3;
 
     gaugeFixingOVR(*U, fix_param);
+    saveTuneCache();
   }
 }
 
@@ -455,6 +459,7 @@ TEST_F(GaugeAlgTest, Landau_FFT)
       fix_param.gauge_dir = 4;
 
       gaugeFixingFFT(*U, fix_param);
+      saveTuneCache();
     }
   }
 }
@@ -469,6 +474,7 @@ TEST_F(GaugeAlgTest, Coulomb_FFT)
       fix_param.gauge_dir = 3;
 
       gaugeFixingFFT(*U, fix_param);
+      saveTuneCache();
     }
   }
 }
@@ -498,6 +504,7 @@ int main(int argc, char **argv)
   setVerbosity(verbosity);
   setQudaPrecisions();
   setWilsonGaugeParam(gauge_param);
+  gauge_param.t_boundary = QUDA_PERIODIC_T;
   setDims(gauge_param.X);
 
   // call srand() with a rank-dependent seed
diff --git a/tests/utils/host_utils.cpp b/tests/utils/host_utils.cpp
index ff3d60ced5..640b501c70 100644
--- a/tests/utils/host_utils.cpp
+++ b/tests/utils/host_utils.cpp
@@ -980,8 +980,11 @@ int x4_from_full_index(int i)
 template <typename Float> void applyGaugeFieldScaling(Float **gauge, int Vh, QudaGaugeParam *param)
 {
   // Apply spatial scaling factor (u0) to spatial links
-  for (int d = 0; d < 3; d++) {
-    for (int i = 0; i < gauge_site_size * Vh * 2; i++) { gauge[d][i] /= param->anisotropy; }
+  if(param->anisotropy != 1.0) {
+    double aniso_inv = 1.0/param->anisotropy;
+    for (int d = 0; d < 3; d++) {
+      for (int i = 0; i < gauge_site_size * Vh * 2; i++) { gauge[d][i] *= aniso_inv; }
+    }
   }
 
   // Apply boundary conditions to temporal links

From 7e83e14921ac9d17406d48bc8de86bfb872c341c Mon Sep 17 00:00:00 2001
From: cpviolator <dmhowarth26@gmail.com>
Date: Tue, 21 Dec 2021 23:24:37 -0800
Subject: [PATCH 31/32] clang tidy

---
 include/quda.h             | 2 +-
 tests/utils/host_utils.cpp | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/quda.h b/include/quda.h
index a706b46090..ced9d2dc25 100644
--- a/include/quda.h
+++ b/include/quda.h
@@ -793,7 +793,7 @@ extern "C" {
     QudaBoolean theta_condition; /**< "Use the theta value to determine the gauge fixing if true. If false, use the
                                     delta value (default false)" */
     QudaPrecision precision;     /**< The precision used by the algorithm */
-    
+
   } QudaGaugeFixParam;
 
   /*
diff --git a/tests/utils/host_utils.cpp b/tests/utils/host_utils.cpp
index 640b501c70..982ebd66cd 100644
--- a/tests/utils/host_utils.cpp
+++ b/tests/utils/host_utils.cpp
@@ -980,8 +980,8 @@ int x4_from_full_index(int i)
 template <typename Float> void applyGaugeFieldScaling(Float **gauge, int Vh, QudaGaugeParam *param)
 {
   // Apply spatial scaling factor (u0) to spatial links
-  if(param->anisotropy != 1.0) {
-    double aniso_inv = 1.0/param->anisotropy;
+  if (param->anisotropy != 1.0) {
+    double aniso_inv = 1.0 / param->anisotropy;
     for (int d = 0; d < 3; d++) {
       for (int i = 0; i < gauge_site_size * Vh * 2; i++) { gauge[d][i] *= aniso_inv; }
     }

From b1e25b05b7f0842614630f4f09637aa1383a743d Mon Sep 17 00:00:00 2001
From: cpviolator <dmhowarth26@gmail.com>
Date: Tue, 21 Dec 2021 23:43:45 -0800
Subject: [PATCH 32/32] Adjust determinant tolerance for double precision

---
 tests/gauge_alg_test.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/gauge_alg_test.cpp b/tests/gauge_alg_test.cpp
index c64011341f..e4ef084b2d 100644
--- a/tests/gauge_alg_test.cpp
+++ b/tests/gauge_alg_test.cpp
@@ -158,14 +158,14 @@ class GaugeAlgTest : public ::testing::Test
     a1 = std::abs(a.y - b.y);
     a2 = std::abs(a.z - b.z);
     double prec_val = 1.0e-5;
-    if (prec == QUDA_DOUBLE_PRECISION) prec_val = gf_tolerance * 1e2;
+    if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1e-10;
     return ((a0 < prec_val) && (a1 < prec_val) && (a2 < prec_val));
   }
 
-  bool CheckDeterminant(double2 det)
+  bool checkDeterminant(double2 det)
   {
     double prec_val = 1.0e-5;
-    if (prec == QUDA_DOUBLE_PRECISION) prec_val = gf_tolerance * 1e2;
+    if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1e-8;
     return (std::abs(1.0 - det.x) < prec_val && std::abs(det.y) < prec_val);
   }
 
@@ -313,7 +313,7 @@ class GaugeAlgTest : public ::testing::Test
 
       // The determinant of any SU(N) gauge field element must be (1.0,0.0) to
       // machine precision
-      ASSERT_TRUE(CheckDeterminant(det_gf));
+      ASSERT_TRUE(checkDeterminant(det_gf));
 
       delete U;
       // Release all temporary memory used for data exchange between GPUs in multi-GPU mode
@@ -419,7 +419,7 @@ TEST_F(GaugeAlgTest, Generation)
     // Assert that the generated gauge is
     // on the SU(N) manifold
     det_u = getLinkDeterminant(*U);
-    ASSERT_TRUE(CheckDeterminant(det_u));
+    ASSERT_TRUE(checkDeterminant(det_u));
   }
 }