From 66130f734283e0d696b06df2c7ad54e92a4ac1fc Mon Sep 17 00:00:00 2001 From: cpviolator Date: Fri, 30 Apr 2021 17:34:21 -0700 Subject: [PATCH 01/32] Add gf test interface to allow fine grained control over the GF testing --- tests/gauge_alg_test.cpp | 64 ++++++++++++------------ tests/heatbath_test.cpp | 3 +- tests/utils/command_line_params.cpp | 75 ++++++++++++++++++++++------- tests/utils/command_line_params.h | 12 +++++ 4 files changed, 102 insertions(+), 52 deletions(-) diff --git a/tests/gauge_alg_test.cpp b/tests/gauge_alg_test.cpp index ebfcaaa0b8..410d2304bd 100644 --- a/tests/gauge_alg_test.cpp +++ b/tests/gauge_alg_test.cpp @@ -21,8 +21,25 @@ using namespace quda; -class GaugeAlgTest : public ::testing::Test { +class GaugeAlgTest : public ::testing::Test +{ protected: + + QudaGaugeParam param; + + Timer a0,a1; + double2 detu; + double3 plaq; + cudaGaugeField *U; + int nsteps; + int nhbsteps; + int novrsteps; + bool coldstart; + double beta_value; + + RNG * randstates; + + void SetReunitarizationConsts(){ const double unitarize_eps = 1e-14; const double max_error = 1e-10; @@ -118,11 +135,11 @@ class GaugeAlgTest : public ::testing::Test { randstates = new RNG(gParam, 1234); randstates->Init(); - nsteps = 10; - nhbsteps = 4; - novrsteps = 4; - coldstart = false; - beta_value = 6.2; + nsteps = heatbath_num_steps; + nhbsteps = heatbath_num_heatbath_per_step; + novrsteps = heatbath_num_overrelax_per_step; + coldstart = heatbath_coldstart; + beta_value = heatbath_beta_value; a0.Start(__func__, __FILE__, __LINE__); a1.Start(__func__, __FILE__, __LINE__); @@ -175,38 +192,18 @@ class GaugeAlgTest : public ::testing::Test { randstates->Release(); delete randstates; } - - QudaGaugeParam param; - - Timer a0,a1; - double2 detu; - double3 plaq; - cudaGaugeField *U; - int nsteps; - int nhbsteps; - int novrsteps; - bool coldstart; - double beta_value; - RNG * randstates; - }; TEST_F(GaugeAlgTest, Generation) { detu = getLinkDeterminant(*U); - plaq = plaquette(*U); - bool testgen = false; - //check plaquette value for beta = 6.2 - if (plaq.x < 0.614 && plaq.x > 0.611 && plaq.y < 0.614 && plaq.y > 0.611) testgen = true; - - if (testgen) { ASSERT_TRUE(CheckDeterminant(detu)); } + ASSERT_TRUE(CheckDeterminant(detu)); } TEST_F(GaugeAlgTest, Landau_Overrelaxation) { - const int reunit_interval = 10; printfQuda("Landau gauge fixing with overrelaxation\n"); - gaugeFixingOVR(*U, 4, 100, 10, 1.5, 0, reunit_interval, 1); + gaugeFixingOVR(*U, 4, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, gf_theta_condition); auto plaq_gf = plaquette(*U); printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); @@ -214,9 +211,8 @@ TEST_F(GaugeAlgTest, Landau_Overrelaxation) TEST_F(GaugeAlgTest, Coulomb_Overrelaxation) { - const int reunit_interval = 10; printfQuda("Coulomb gauge fixing with overrelaxation\n"); - gaugeFixingOVR(*U, 3, 100, 10, 1.5, 0, reunit_interval, 1); + gaugeFixingOVR(*U, 3, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, gf_theta_condition); auto plaq_gf = plaquette(*U); printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); @@ -226,7 +222,7 @@ TEST_F(GaugeAlgTest, Landau_FFT) { if (!checkDimsPartitioned()) { printfQuda("Landau gauge fixing with steepest descent method with FFTs\n"); - gaugeFixingFFT(*U, 4, 100, 10, 0.08, 0, 0, 1); + gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, gf_theta_condition); auto plaq_gf = plaquette(*U); printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); @@ -237,7 +233,7 @@ TEST_F(GaugeAlgTest, Coulomb_FFT) { if (!checkDimsPartitioned()) { printfQuda("Coulomb gauge fixing with steepest descent method with FFTs\n"); - gaugeFixingFFT(*U, 3, 100, 10, 0.08, 0, 0, 1); + gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, gf_theta_condition); auto plaq_gf = plaquette(*U); printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); @@ -252,8 +248,10 @@ int main(int argc, char **argv) int test_rc = 0; xdim=ydim=zdim=tdim=32; - // command line options + // command line options auto app = make_app(); + add_gaugefix_option_group(app); + add_heatbath_option_group(app); try { app->parse(argc, argv); } catch (const CLI::ParseError &e) { diff --git a/tests/heatbath_test.cpp b/tests/heatbath_test.cpp index 98e69f613b..a0734f1516 100644 --- a/tests/heatbath_test.cpp +++ b/tests/heatbath_test.cpp @@ -53,9 +53,10 @@ void display_test_info() } int main(int argc, char **argv) -{ +{ // command line options auto app = make_app(); + add_heatbath_option_group(app); try { app->parse(argc, argv); } catch (const CLI::ParseError &e) { diff --git a/tests/utils/command_line_params.cpp b/tests/utils/command_line_params.cpp index 0a300b19af..b0e312b69d 100644 --- a/tests/utils/command_line_params.cpp +++ b/tests/utils/command_line_params.cpp @@ -221,6 +221,12 @@ quda::mgarray mg_eig_save_prec = {}; bool mg_eig_coarse_guess = false; bool mg_eig_preserve_deflation = false; +int eofa_pm = 1; +double eofa_shift = -1.2345; +double eofa_mq1 = 1.0; +double eofa_mq2 = 0.085; +double eofa_mq3 = 1.0; + double heatbath_beta_value = 6.2; int heatbath_warmup_steps = 10; int heatbath_num_steps = 10; @@ -228,12 +234,6 @@ int heatbath_num_heatbath_per_step = 5; int heatbath_num_overrelax_per_step = 5; bool heatbath_coldstart = false; -int eofa_pm = 1; -double eofa_shift = -1.2345; -double eofa_mq1 = 1.0; -double eofa_mq2 = 0.085; -double eofa_mq3 = 1.0; - double stout_smear_rho = 0.1; double stout_smear_epsilon = -0.25; double ape_smear_rho = 0.6; @@ -243,6 +243,16 @@ int wflow_steps = 100; QudaWFlowType wflow_type = QUDA_WFLOW_TYPE_WILSON; int measurement_interval = 5; +int gf_gauge_dir = 4; +int gf_maxiter = 10000; +int gf_verbosity_interval = 100; +double gf_ovr_relaxation_boost = 1.5; +double gf_fft_alpha = 0.8; +int gf_reunit_interval = 10; +double gf_tolerance = 1e-6; +bool gf_theta_condition = false; +bool gf_fft_autotune = false; + QudaContractType contract_type = QUDA_CONTRACT_TYPE_OPEN; std::array grid_partition = {1, 1, 1, 1}; @@ -495,18 +505,6 @@ std::shared_ptr make_app(std::string app_description, std::string app_n quda_app->add_option("--gaussian-sigma", gaussian_sigma, "Width of the Gaussian noise used for random gauge field contruction (default 0.2)"); - quda_app->add_option("--heatbath-beta", heatbath_beta_value, "Beta value used in heatbath test (default 6.2)"); - quda_app->add_option("--heatbath-coldstart", heatbath_coldstart, - "Whether to use a cold or hot start in heatbath test (default false)"); - quda_app->add_option("--heatbath-num-hb-per-step", heatbath_num_heatbath_per_step, - "Number of heatbath hits per heatbath step (default 5)"); - quda_app->add_option("--heatbath-num-or-per-step", heatbath_num_overrelax_per_step, - "Number of overrelaxation hits per heatbath step (default 5)"); - quda_app->add_option("--heatbath-num-steps", heatbath_num_steps, - "Number of measurement steps in heatbath test (default 10)"); - quda_app->add_option("--heatbath-warmup-steps", heatbath_warmup_steps, - "Number of warmup steps in heatbath test (default 10)"); - quda_app->add_option("--inv-type", inv_type, "The type of solver to use (default cg)") ->transform(CLI::QUDACheckedTransformer(inverter_type_map)); quda_app->add_option("--inv-deflate", inv_deflate, "Deflate the inverter using the eigensolver"); @@ -1016,6 +1014,47 @@ void add_su3_option_group(std::shared_ptr quda_app) "Measure the field energy and topological charge every Nth step (default 5) "); } +void add_heatbath_option_group(std::shared_ptr quda_app) +{ + // Option group for heatbath related options + auto opgroup = quda_app->add_option_group("heatbath", "Options controlling heatbath tests"); + opgroup->add_option("--heatbath-beta", heatbath_beta_value, "Beta value used in heatbath test (default 6.2)"); + opgroup->add_option("--heatbath-coldstart", heatbath_coldstart, + "Whether to use a cold or hot start in heatbath test (default false)"); + opgroup->add_option("--heatbath-num-hb-per-step", heatbath_num_heatbath_per_step, + "Number of heatbath hits per heatbath step (default 5)"); + opgroup->add_option("--heatbath-num-or-per-step", heatbath_num_overrelax_per_step, + "Number of overrelaxation hits per heatbath step (default 5)"); + opgroup->add_option("--heatbath-num-steps", heatbath_num_steps, + "Number of measurement steps in heatbath test (default 10)"); + opgroup->add_option("--heatbath-warmup-steps", heatbath_warmup_steps, + "Number of warmup steps in heatbath test (default 10)"); +} + +void add_gaugefix_option_group(std::shared_ptr quda_app) +{ + // Option group for gauge fixing related options + auto opgroup = quda_app->add_option_group("gaugefix", "Options controlling gauge fixing tests"); + opgroup->add_option("--gf-dir", gf_gauge_dir, "The orthogonal direction of teh gauge fixing, 3=Coulomb, 4=Landau. (default 4)"); + opgroup->add_option("--gf-maxiter", gf_maxiter, + "The maximun number of gauge fixing iterations to be applied (default 10000) "); + opgroup->add_option("--gf-verbosity-interval", gf_verbosity_interval, + "Print the gauge fixing progress every N steps (default 100)"); + opgroup->add_option("--gf-ovr-relaxation-boost", gf_ovr_relaxation_boost, + "The overrelaxation boost parameter for the overrelaxation method (default 1.5)"); + opgroup->add_option("--gf-fft-alpha", gf_fft_alpha, + "The Alpha parameter in the FFT method (default 0.8)"); + opgroup->add_option("--gf-reunit-interval", gf_reunit_interval, + "Reunitarise the gauge field every N steps (default 10)"); + opgroup->add_option("--gf-tol", gf_tolerance, + "The tolerance of the gauge fixing quality (default 1e-6)"); + opgroup->add_option("--gf-theta-condition", gf_theta_condition, + "Use the theta value to determine the gauge fixing if true. If false, use the delta value (default false)"); + opgroup->add_option("--gf-fft-autotune", gf_fft_autotune, + "In the FFT method, automatically adjust the alpha parameter if the quality begins to diverge (default false)"); +} + + void add_comms_option_group(std::shared_ptr quda_app) { auto opgroup diff --git a/tests/utils/command_line_params.h b/tests/utils/command_line_params.h index ce0bcaf718..12f16c2046 100644 --- a/tests/utils/command_line_params.h +++ b/tests/utils/command_line_params.h @@ -133,6 +133,8 @@ void add_deflation_option_group(std::shared_ptr quda_app); void add_multigrid_option_group(std::shared_ptr quda_app); void add_eofa_option_group(std::shared_ptr quda_app); void add_su3_option_group(std::shared_ptr quda_app); +void add_heatbath_option_group(std::shared_ptr quda_app); +void add_gaugefix_option_group(std::shared_ptr quda_app); void add_comms_option_group(std::shared_ptr quda_app); template std::string inline get_string(CLI::TransformPairs &map, T val) @@ -369,6 +371,16 @@ extern int wflow_steps; extern QudaWFlowType wflow_type; extern int measurement_interval; +extern int gf_gauge_dir; +extern int gf_maxiter; +extern int gf_verbosity_interval; +extern double gf_ovr_relaxation_boost; +extern double gf_fft_alpha; +extern int gf_reunit_interval; +extern double gf_tolerance; +extern bool gf_theta_condition; +extern bool gf_fft_autotune; + extern QudaContractType contract_type; extern std::array grid_partition; From 0071401c16af2770a7aae0b691f51fec4af0c504 Mon Sep 17 00:00:00 2001 From: cpviolator Date: Fri, 30 Apr 2021 18:48:50 -0700 Subject: [PATCH 02/32] Move the gauge al test to a ctest, make a new interface to the gauge fixing that allows for fine grained control and gauge IO --- lib/interface_quda.cpp | 87 ++++---- tests/CMakeLists.txt | 4 + tests/gauge_alg_ctest.cpp | 274 ++++++++++++++++++++++++ tests/gauge_alg_test.cpp | 428 +++++++++++++++++++------------------- tests/su3_test.cpp | 35 ++-- 5 files changed, 543 insertions(+), 285 deletions(-) create mode 100644 tests/gauge_alg_ctest.cpp diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp index 3af70859d8..6638a32046 100644 --- a/lib/interface_quda.cpp +++ b/lib/interface_quda.cpp @@ -236,8 +236,8 @@ static TimeProfile profileMomAction("momActionQuda"); static TimeProfile profileEnd("endQuda"); //!< Profiler for GaugeFixing -static TimeProfile GaugeFixFFTQuda("GaugeFixFFTQuda"); -static TimeProfile GaugeFixOVRQuda("GaugeFixOVRQuda"); +static TimeProfile profileGaugeFixFFT("gaugeFixFFTQuda"); +static TimeProfile profileGaugeFixOVR("gaugeFixOVRQuda"); //!< Profiler for toal time spend between init and end static TimeProfile profileInit2End("initQuda-endQuda",false); @@ -1535,6 +1535,8 @@ void endQuda(void) profileProject.Print(); profilePhase.Print(); profileMomAction.Print(); + profileGaugeFixOVR.Print(); + profileGaugeFixFFT.Print(); profileEnd.Print(); profileInit2End.Print(); @@ -5809,11 +5811,11 @@ int computeGaugeFixingOVRQuda(void *gauge, const unsigned int gauge_dir, const u const unsigned int reunit_interval, const unsigned int stopWtheta, QudaGaugeParam *param, double *timeinfo) { - GaugeFixOVRQuda.TPSTART(QUDA_PROFILE_TOTAL); - + profileGaugeFixOVR.TPSTART(QUDA_PROFILE_TOTAL); + checkGaugeParam(param); - GaugeFixOVRQuda.TPSTART(QUDA_PROFILE_INIT); + profileGaugeFixOVR.TPSTART(QUDA_PROFILE_INIT); GaugeFieldParam gParam(gauge, *param); auto *cpuGauge = new cpuGaugeField(gParam); @@ -5824,44 +5826,37 @@ int computeGaugeFixingOVRQuda(void *gauge, const unsigned int gauge_dir, const u gParam.setPrecision(gParam.Precision(), true); auto *cudaInGauge = new cudaGaugeField(gParam); - GaugeFixOVRQuda.TPSTOP(QUDA_PROFILE_INIT); - GaugeFixOVRQuda.TPSTART(QUDA_PROFILE_H2D); + profileGaugeFixOVR.TPSTOP(QUDA_PROFILE_INIT); + profileGaugeFixOVR.TPSTART(QUDA_PROFILE_H2D); - ///if (!param->use_resident_gauge) { // load fields onto the device cudaInGauge->loadCPUField(*cpuGauge); - /* } else { // or use resident fields already present - if (!gaugePrecise) errorQuda("No resident gauge field allocated"); - cudaInGauge = gaugePrecise; - gaugePrecise = nullptr; - } */ - GaugeFixOVRQuda.TPSTOP(QUDA_PROFILE_H2D); + profileGaugeFixOVR.TPSTOP(QUDA_PROFILE_H2D); if (comm_size() == 1) { // perform the update - GaugeFixOVRQuda.TPSTART(QUDA_PROFILE_COMPUTE); + profileGaugeFixOVR.TPSTART(QUDA_PROFILE_COMPUTE); gaugeFixingOVR(*cudaInGauge, gauge_dir, Nsteps, verbose_interval, relax_boost, tolerance, reunit_interval, stopWtheta); - GaugeFixOVRQuda.TPSTOP(QUDA_PROFILE_COMPUTE); + profileGaugeFixOVR.TPSTOP(QUDA_PROFILE_COMPUTE); } else { - cudaGaugeField *cudaInGaugeEx = createExtendedGauge(*cudaInGauge, R, GaugeFixOVRQuda); + cudaGaugeField *cudaInGaugeEx = createExtendedGauge(*cudaInGauge, R, profileGaugeFixOVR); - // perform the update - GaugeFixOVRQuda.TPSTART(QUDA_PROFILE_COMPUTE); + // Perform the update + profileGaugeFixOVR.TPSTART(QUDA_PROFILE_COMPUTE); gaugeFixingOVR(*cudaInGaugeEx, gauge_dir, Nsteps, verbose_interval, relax_boost, tolerance, reunit_interval, stopWtheta); - GaugeFixOVRQuda.TPSTOP(QUDA_PROFILE_COMPUTE); + profileGaugeFixOVR.TPSTOP(QUDA_PROFILE_COMPUTE); - //HOW TO COPY BACK TO CPU: cudaInGaugeEx->cpuGauge copyExtendedGauge(*cudaInGauge, *cudaInGaugeEx, QUDA_CUDA_FIELD_LOCATION); } - - // copy the gauge field back to the host - GaugeFixOVRQuda.TPSTART(QUDA_PROFILE_D2H); + + // Copy the gauge field back to the host + profileGaugeFixOVR.TPSTART(QUDA_PROFILE_D2H); cudaInGauge->saveCPUField(*cpuGauge); - GaugeFixOVRQuda.TPSTOP(QUDA_PROFILE_D2H); + profileGaugeFixOVR.TPSTOP(QUDA_PROFILE_D2H); - GaugeFixOVRQuda.TPSTOP(QUDA_PROFILE_TOTAL); + profileGaugeFixOVR.TPSTOP(QUDA_PROFILE_TOTAL); if (param->make_resident_gauge) { if (gaugePrecise != nullptr) delete gaugePrecise; @@ -5871,9 +5866,9 @@ int computeGaugeFixingOVRQuda(void *gauge, const unsigned int gauge_dir, const u } if(timeinfo){ - timeinfo[0] = GaugeFixOVRQuda.Last(QUDA_PROFILE_H2D); - timeinfo[1] = GaugeFixOVRQuda.Last(QUDA_PROFILE_COMPUTE); - timeinfo[2] = GaugeFixOVRQuda.Last(QUDA_PROFILE_D2H); + timeinfo[0] = profileGaugeFixOVR.Last(QUDA_PROFILE_H2D); + timeinfo[1] = profileGaugeFixOVR.Last(QUDA_PROFILE_COMPUTE); + timeinfo[2] = profileGaugeFixOVR.Last(QUDA_PROFILE_D2H); } return 0; @@ -5883,11 +5878,11 @@ int computeGaugeFixingFFTQuda(void* gauge, const unsigned int gauge_dir, const const unsigned int verbose_interval, const double alpha, const unsigned int autotune, const double tolerance, \ const unsigned int stopWtheta, QudaGaugeParam* param , double* timeinfo) { - GaugeFixFFTQuda.TPSTART(QUDA_PROFILE_TOTAL); + profileGaugeFixFFT.TPSTART(QUDA_PROFILE_TOTAL); checkGaugeParam(param); - GaugeFixFFTQuda.TPSTART(QUDA_PROFILE_INIT); + profileGaugeFixFFT.TPSTART(QUDA_PROFILE_INIT); GaugeFieldParam gParam(gauge, *param); auto *cpuGauge = new cpuGaugeField(gParam); @@ -5900,33 +5895,27 @@ int computeGaugeFixingFFTQuda(void* gauge, const unsigned int gauge_dir, const auto *cudaInGauge = new cudaGaugeField(gParam); - GaugeFixFFTQuda.TPSTOP(QUDA_PROFILE_INIT); + profileGaugeFixFFT.TPSTOP(QUDA_PROFILE_INIT); - GaugeFixFFTQuda.TPSTART(QUDA_PROFILE_H2D); + profileGaugeFixFFT.TPSTART(QUDA_PROFILE_H2D); - //if (!param->use_resident_gauge) { // load fields onto the device cudaInGauge->loadCPUField(*cpuGauge); - /*} else { // or use resident fields already present - if (!gaugePrecise) errorQuda("No resident gauge field allocated"); - cudaInGauge = gaugePrecise; - gaugePrecise = nullptr; - } */ - GaugeFixFFTQuda.TPSTOP(QUDA_PROFILE_H2D); + profileGaugeFixFFT.TPSTOP(QUDA_PROFILE_H2D); // perform the update - GaugeFixFFTQuda.TPSTART(QUDA_PROFILE_COMPUTE); + profileGaugeFixFFT.TPSTART(QUDA_PROFILE_COMPUTE); gaugeFixingFFT(*cudaInGauge, gauge_dir, Nsteps, verbose_interval, alpha, autotune, tolerance, stopWtheta); - GaugeFixFFTQuda.TPSTOP(QUDA_PROFILE_COMPUTE); + profileGaugeFixFFT.TPSTOP(QUDA_PROFILE_COMPUTE); // copy the gauge field back to the host - GaugeFixFFTQuda.TPSTART(QUDA_PROFILE_D2H); + profileGaugeFixFFT.TPSTART(QUDA_PROFILE_D2H); cudaInGauge->saveCPUField(*cpuGauge); - GaugeFixFFTQuda.TPSTOP(QUDA_PROFILE_D2H); + profileGaugeFixFFT.TPSTOP(QUDA_PROFILE_D2H); - GaugeFixFFTQuda.TPSTOP(QUDA_PROFILE_TOTAL); + profileGaugeFixFFT.TPSTOP(QUDA_PROFILE_TOTAL); if (param->make_resident_gauge) { if (gaugePrecise != nullptr) delete gaugePrecise; @@ -5934,11 +5923,11 @@ int computeGaugeFixingFFTQuda(void* gauge, const unsigned int gauge_dir, const } else { delete cudaInGauge; } - + if (timeinfo) { - timeinfo[0] = GaugeFixFFTQuda.Last(QUDA_PROFILE_H2D); - timeinfo[1] = GaugeFixFFTQuda.Last(QUDA_PROFILE_COMPUTE); - timeinfo[2] = GaugeFixFFTQuda.Last(QUDA_PROFILE_D2H); + timeinfo[0] = profileGaugeFixFFT.Last(QUDA_PROFILE_H2D); + timeinfo[1] = profileGaugeFixFFT.Last(QUDA_PROFILE_COMPUTE); + timeinfo[2] = profileGaugeFixFFT.Last(QUDA_PROFILE_D2H); } return 0; diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 7c357e67ac..ed9e9df62f 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -239,6 +239,10 @@ if(QUDA_GAUGE_ALG) add_executable(gauge_alg_test gauge_alg_test.cpp) target_link_libraries(gauge_alg_test ${TEST_LIBS}) quda_checkbuildtest(gauge_alg_test QUDA_BUILD_ALL_TESTS) + + add_executable(gauge_alg_ctest gauge_alg_ctest.cpp) + target_link_libraries(gauge_alg_ctest ${TEST_LIBS}) + quda_checkbuildtest(gauge_alg_ctest QUDA_BUILD_ALL_TESTS) install(TARGETS gauge_alg_test ${QUDA_EXCLUDE_FROM_INSTALL} DESTINATION ${CMAKE_INSTALL_BINDIR}) add_executable(heatbath_test heatbath_test.cpp) diff --git a/tests/gauge_alg_ctest.cpp b/tests/gauge_alg_ctest.cpp new file mode 100644 index 0000000000..410d2304bd --- /dev/null +++ b/tests/gauge_alg_ctest.cpp @@ -0,0 +1,274 @@ +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + +#include + +#include + +using namespace quda; + +class GaugeAlgTest : public ::testing::Test +{ + protected: + + QudaGaugeParam param; + + Timer a0,a1; + double2 detu; + double3 plaq; + cudaGaugeField *U; + int nsteps; + int nhbsteps; + int novrsteps; + bool coldstart; + double beta_value; + + RNG * randstates; + + + void SetReunitarizationConsts(){ + const double unitarize_eps = 1e-14; + const double max_error = 1e-10; + const int reunit_allow_svd = 1; + const int reunit_svd_only = 0; + const double svd_rel_error = 1e-6; + const double svd_abs_error = 1e-6; + setUnitarizeLinksConstants(unitarize_eps, max_error, + reunit_allow_svd, reunit_svd_only, + svd_rel_error, svd_abs_error); + + } + + bool checkDimsPartitioned() + { + if (comm_dim_partitioned(0) || comm_dim_partitioned(1) || comm_dim_partitioned(2) || comm_dim_partitioned(3)) + return true; + return false; + } + + bool comparePlaquette(double3 a, double3 b){ + double a0,a1,a2; + a0 = std::abs(a.x - b.x); + a1 = std::abs(a.y - b.y); + a2 = std::abs(a.z - b.z); + double prec_val = 1.0e-5; + if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1.0e-15; + if ((a0 < prec_val) && (a1 < prec_val) && (a2 < prec_val)) return true; + return false; + } + + bool CheckDeterminant(double2 detu){ + double prec_val = 5e-8; + if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1.0e-15; + if (std::abs(1.0 - detu.x) < prec_val && std::abs(detu.y) < prec_val) return true; + return false; + } + + virtual void SetUp() { + setVerbosity(QUDA_VERBOSE); + + param = newQudaGaugeParam(); + + // Setup gauge container. + param.cpu_prec = prec; + param.cpu_prec = prec; + param.cuda_prec = prec; + param.reconstruct = link_recon; + param.cuda_prec_sloppy = prec; + param.reconstruct_sloppy = link_recon; + + param.type = QUDA_WILSON_LINKS; + param.gauge_order = QUDA_MILC_GAUGE_ORDER; + + param.X[0] = xdim; + param.X[1] = ydim; + param.X[2] = zdim; + param.X[3] = tdim; + setDims(param.X); + + param.anisotropy = 1.0; //don't support anisotropy for now!!!!!! + param.t_boundary = QUDA_PERIODIC_T; + param.gauge_fix = QUDA_GAUGE_FIXED_NO; + param.ga_pad = 0; + + GaugeFieldParam gParam(0, param); + gParam.pad = 0; + gParam.ghostExchange = QUDA_GHOST_EXCHANGE_NO; + gParam.create = QUDA_NULL_FIELD_CREATE; + gParam.link_type = param.type; + gParam.reconstruct = param.reconstruct; + gParam.setPrecision(gParam.Precision(), true); + +#ifdef MULTI_GPU + int y[4]; + int R[4] = {0,0,0,0}; + for(int dir=0; dir<4; ++dir) if(comm_dim_partitioned(dir)) R[dir] = 2; + for(int dir=0; dir<4; ++dir) y[dir] = param.X[dir] + 2 * R[dir]; + int pad = 0; + GaugeFieldParam gParamEx(y, prec, link_recon, + pad, QUDA_VECTOR_GEOMETRY, QUDA_GHOST_EXCHANGE_EXTENDED); + gParamEx.create = QUDA_ZERO_FIELD_CREATE; + gParamEx.order = gParam.order; + gParamEx.siteSubset = QUDA_FULL_SITE_SUBSET; + gParamEx.t_boundary = gParam.t_boundary; + gParamEx.nFace = 1; + for(int dir=0; dir<4; ++dir) gParamEx.r[dir] = R[dir]; + U = new cudaGaugeField(gParamEx); +#else + U = new cudaGaugeField(gParam); +#endif + // CURAND random generator initialization + randstates = new RNG(gParam, 1234); + randstates->Init(); + + nsteps = heatbath_num_steps; + nhbsteps = heatbath_num_heatbath_per_step; + novrsteps = heatbath_num_overrelax_per_step; + coldstart = heatbath_coldstart; + beta_value = heatbath_beta_value; + + a0.Start(__func__, __FILE__, __LINE__); + a1.Start(__func__, __FILE__, __LINE__); + + int *num_failures_h = (int *)mapped_malloc(sizeof(int)); + int *num_failures_d = (int *)get_mapped_device_pointer(num_failures_h); + + if (link_recon != QUDA_RECONSTRUCT_8 && coldstart) + InitGaugeField(*U); + else + InitGaugeField(*U, *randstates); + + // Reunitarization setup + SetReunitarizationConsts(); + plaquette(*U); + + for(int step=1; step<=nsteps; ++step){ + printfQuda("Step %d\n",step); + Monte(*U, *randstates, beta_value, nhbsteps, novrsteps); + + //Reunitarize gauge links... + *num_failures_h = 0; + unitarizeLinks(*U, num_failures_d); + qudaDeviceSynchronize(); + if (*num_failures_h > 0) errorQuda("Error in the unitarization\n"); + + plaquette(*U); + } + a1.Stop(__func__, __FILE__, __LINE__); + + printfQuda("Time Monte -> %.6f s\n", a1.Last()); + plaq = plaquette(*U); + printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z); + + host_free(num_failures_h); + } + + virtual void TearDown() { + detu = getLinkDeterminant(*U); + double2 tru = getLinkTrace(*U); + printfQuda("Det: %.16e:%.16e\n", detu.x, detu.y); + printfQuda("Tr: %.16e:%.16e\n", tru.x/3.0, tru.y/3.0); + + delete U; + //Release all temporary memory used for data exchange between GPUs in multi-GPU mode + PGaugeExchangeFree(); + + a0.Stop(__func__, __FILE__, __LINE__); + printfQuda("Time -> %.6f s\n", a0.Last()); + randstates->Release(); + delete randstates; + } +}; + +TEST_F(GaugeAlgTest, Generation) +{ + detu = getLinkDeterminant(*U); + ASSERT_TRUE(CheckDeterminant(detu)); +} + +TEST_F(GaugeAlgTest, Landau_Overrelaxation) +{ + printfQuda("Landau gauge fixing with overrelaxation\n"); + gaugeFixingOVR(*U, 4, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, gf_theta_condition); + auto plaq_gf = plaquette(*U); + printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); + ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); +} + +TEST_F(GaugeAlgTest, Coulomb_Overrelaxation) +{ + printfQuda("Coulomb gauge fixing with overrelaxation\n"); + gaugeFixingOVR(*U, 3, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, gf_theta_condition); + auto plaq_gf = plaquette(*U); + printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); + ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); +} + +TEST_F(GaugeAlgTest, Landau_FFT) +{ + if (!checkDimsPartitioned()) { + printfQuda("Landau gauge fixing with steepest descent method with FFTs\n"); + gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, gf_theta_condition); + auto plaq_gf = plaquette(*U); + printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); + ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); + } +} + +TEST_F(GaugeAlgTest, Coulomb_FFT) +{ + if (!checkDimsPartitioned()) { + printfQuda("Coulomb gauge fixing with steepest descent method with FFTs\n"); + gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, gf_theta_condition); + auto plaq_gf = plaquette(*U); + printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); + ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); + } +} + +int main(int argc, char **argv) +{ + // initalize google test, includes command line options + ::testing::InitGoogleTest(&argc, argv); + // return code for google test + int test_rc = 0; + xdim=ydim=zdim=tdim=32; + + // command line options + auto app = make_app(); + add_gaugefix_option_group(app); + add_heatbath_option_group(app); + try { + app->parse(argc, argv); + } catch (const CLI::ParseError &e) { + return app->exit(e); + } + + initComms(argc, argv, gridsize_from_cmdline); + + // Ensure gtest prints only from rank 0 + ::testing::TestEventListeners &listeners = ::testing::UnitTest::GetInstance()->listeners(); + if (comm_rank() != 0) { delete listeners.Release(listeners.default_result_printer()); } + + initQuda(device_ordinal); + test_rc = RUN_ALL_TESTS(); + endQuda(); + + finalizeComms(); + + return test_rc; +} diff --git a/tests/gauge_alg_test.cpp b/tests/gauge_alg_test.cpp index 410d2304bd..5579460ab7 100644 --- a/tests/gauge_alg_test.cpp +++ b/tests/gauge_alg_test.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -21,254 +22,247 @@ using namespace quda; -class GaugeAlgTest : public ::testing::Test -{ - protected: - - QudaGaugeParam param; +void display_test_info() +{ + printfQuda("running the following test:\n"); - Timer a0,a1; - double2 detu; - double3 plaq; - cudaGaugeField *U; - int nsteps; - int nhbsteps; - int novrsteps; - bool coldstart; - double beta_value; + printfQuda("prec sloppy_prec link_recon sloppy_link_recon S_dimension T_dimension Ls_dimension\n"); + printfQuda("%s %s %s %s %d/%d/%d %d %d\n", get_prec_str(prec), + get_prec_str(prec_sloppy), get_recon_str(link_recon), get_recon_str(link_recon_sloppy), xdim, ydim, zdim, + tdim, Lsdim); + + printfQuda("Grid partition info: X Y Z T\n"); + printfQuda(" %d %d %d %d\n", dimPartitioned(0), dimPartitioned(1), dimPartitioned(2), + dimPartitioned(3)); +} - RNG * randstates; +void SetReunitarizationConsts(){ + const double unitarize_eps = 1e-14; + const double max_error = 1e-10; + const int reunit_allow_svd = 1; + const int reunit_svd_only = 0; + const double svd_rel_error = 1e-6; + const double svd_abs_error = 1e-6; + setUnitarizeLinksConstants(unitarize_eps, max_error, + reunit_allow_svd, reunit_svd_only, + svd_rel_error, svd_abs_error); - void SetReunitarizationConsts(){ - const double unitarize_eps = 1e-14; - const double max_error = 1e-10; - const int reunit_allow_svd = 1; - const int reunit_svd_only = 0; - const double svd_rel_error = 1e-6; - const double svd_abs_error = 1e-6; - setUnitarizeLinksConstants(unitarize_eps, max_error, - reunit_allow_svd, reunit_svd_only, - svd_rel_error, svd_abs_error); - - } - - bool checkDimsPartitioned() - { - if (comm_dim_partitioned(0) || comm_dim_partitioned(1) || comm_dim_partitioned(2) || comm_dim_partitioned(3)) - return true; - return false; - } - - bool comparePlaquette(double3 a, double3 b){ - double a0,a1,a2; - a0 = std::abs(a.x - b.x); - a1 = std::abs(a.y - b.y); - a2 = std::abs(a.z - b.z); - double prec_val = 1.0e-5; - if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1.0e-15; - if ((a0 < prec_val) && (a1 < prec_val) && (a2 < prec_val)) return true; - return false; - } - - bool CheckDeterminant(double2 detu){ - double prec_val = 5e-8; - if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1.0e-15; - if (std::abs(1.0 - detu.x) < prec_val && std::abs(detu.y) < prec_val) return true; - return false; - } - - virtual void SetUp() { - setVerbosity(QUDA_VERBOSE); - - param = newQudaGaugeParam(); - - // Setup gauge container. - param.cpu_prec = prec; - param.cpu_prec = prec; - param.cuda_prec = prec; - param.reconstruct = link_recon; - param.cuda_prec_sloppy = prec; - param.reconstruct_sloppy = link_recon; - - param.type = QUDA_WILSON_LINKS; - param.gauge_order = QUDA_MILC_GAUGE_ORDER; - - param.X[0] = xdim; - param.X[1] = ydim; - param.X[2] = zdim; - param.X[3] = tdim; - setDims(param.X); - - param.anisotropy = 1.0; //don't support anisotropy for now!!!!!! - param.t_boundary = QUDA_PERIODIC_T; - param.gauge_fix = QUDA_GAUGE_FIXED_NO; - param.ga_pad = 0; - - GaugeFieldParam gParam(0, param); - gParam.pad = 0; - gParam.ghostExchange = QUDA_GHOST_EXCHANGE_NO; - gParam.create = QUDA_NULL_FIELD_CREATE; - gParam.link_type = param.type; - gParam.reconstruct = param.reconstruct; - gParam.setPrecision(gParam.Precision(), true); - -#ifdef MULTI_GPU - int y[4]; - int R[4] = {0,0,0,0}; - for(int dir=0; dir<4; ++dir) if(comm_dim_partitioned(dir)) R[dir] = 2; - for(int dir=0; dir<4; ++dir) y[dir] = param.X[dir] + 2 * R[dir]; - int pad = 0; - GaugeFieldParam gParamEx(y, prec, link_recon, - pad, QUDA_VECTOR_GEOMETRY, QUDA_GHOST_EXCHANGE_EXTENDED); - gParamEx.create = QUDA_ZERO_FIELD_CREATE; - gParamEx.order = gParam.order; - gParamEx.siteSubset = QUDA_FULL_SITE_SUBSET; - gParamEx.t_boundary = gParam.t_boundary; - gParamEx.nFace = 1; - for(int dir=0; dir<4; ++dir) gParamEx.r[dir] = R[dir]; - U = new cudaGaugeField(gParamEx); -#else - U = new cudaGaugeField(gParam); -#endif - // CURAND random generator initialization - randstates = new RNG(gParam, 1234); - randstates->Init(); - - nsteps = heatbath_num_steps; - nhbsteps = heatbath_num_heatbath_per_step; - novrsteps = heatbath_num_overrelax_per_step; - coldstart = heatbath_coldstart; - beta_value = heatbath_beta_value; - - a0.Start(__func__, __FILE__, __LINE__); - a1.Start(__func__, __FILE__, __LINE__); - - int *num_failures_h = (int *)mapped_malloc(sizeof(int)); - int *num_failures_d = (int *)get_mapped_device_pointer(num_failures_h); - - if (link_recon != QUDA_RECONSTRUCT_8 && coldstart) - InitGaugeField(*U); - else - InitGaugeField(*U, *randstates); - - // Reunitarization setup - SetReunitarizationConsts(); - plaquette(*U); - - for(int step=1; step<=nsteps; ++step){ - printfQuda("Step %d\n",step); - Monte(*U, *randstates, beta_value, nhbsteps, novrsteps); - - //Reunitarize gauge links... - *num_failures_h = 0; - unitarizeLinks(*U, num_failures_d); - qudaDeviceSynchronize(); - if (*num_failures_h > 0) errorQuda("Error in the unitarization\n"); - - plaquette(*U); - } - a1.Stop(__func__, __FILE__, __LINE__); - - printfQuda("Time Monte -> %.6f s\n", a1.Last()); - plaq = plaquette(*U); - printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z); - - host_free(num_failures_h); - } - - virtual void TearDown() { - detu = getLinkDeterminant(*U); - double2 tru = getLinkTrace(*U); - printfQuda("Det: %.16e:%.16e\n", detu.x, detu.y); - printfQuda("Tr: %.16e:%.16e\n", tru.x/3.0, tru.y/3.0); - - delete U; - //Release all temporary memory used for data exchange between GPUs in multi-GPU mode - PGaugeExchangeFree(); - - a0.Stop(__func__, __FILE__, __LINE__); - printfQuda("Time -> %.6f s\n", a0.Last()); - randstates->Release(); - delete randstates; - } -}; - -TEST_F(GaugeAlgTest, Generation) -{ - detu = getLinkDeterminant(*U); - ASSERT_TRUE(CheckDeterminant(detu)); -} - -TEST_F(GaugeAlgTest, Landau_Overrelaxation) -{ - printfQuda("Landau gauge fixing with overrelaxation\n"); - gaugeFixingOVR(*U, 4, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, gf_theta_condition); - auto plaq_gf = plaquette(*U); - printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); - ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); } -TEST_F(GaugeAlgTest, Coulomb_Overrelaxation) +bool checkDimsPartitioned() { - printfQuda("Coulomb gauge fixing with overrelaxation\n"); - gaugeFixingOVR(*U, 3, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, gf_theta_condition); - auto plaq_gf = plaquette(*U); - printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); - ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); + if (comm_dim_partitioned(0) || comm_dim_partitioned(1) || comm_dim_partitioned(2) || comm_dim_partitioned(3)) + return true; + return false; } -TEST_F(GaugeAlgTest, Landau_FFT) +bool comparePlaquette(double3 a, double3 b) { - if (!checkDimsPartitioned()) { - printfQuda("Landau gauge fixing with steepest descent method with FFTs\n"); - gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, gf_theta_condition); - auto plaq_gf = plaquette(*U); - printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); - ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); - } + printfQuda("Plaq: %.16e, %.16e, %.16e\n", a.x, a.y, a.z); + printfQuda("Plaq_gf: %.16e, %.16e, %.16e\n", b.x, b.y, b.z); + double a0,a1,a2; + a0 = std::abs(a.x - b.x); + a1 = std::abs(a.y - b.y); + a2 = std::abs(a.z - b.z); + double prec_val = 1.0e-5; + if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1.0e-15; + return ((a0 < prec_val) && (a1 < prec_val) && (a2 < prec_val)); } -TEST_F(GaugeAlgTest, Coulomb_FFT) +bool checkDeterminant(double2 detu) { - if (!checkDimsPartitioned()) { - printfQuda("Coulomb gauge fixing with steepest descent method with FFTs\n"); - gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, gf_theta_condition); - auto plaq_gf = plaquette(*U); - printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); - ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); - } + printfQuda("Det: %.16e: %.16e\n", detu.x, detu.y); + double prec_val = 5e-8; + if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1.0e-15; + return std::abs(1.0 - detu.x) < prec_val && std::abs(detu.y) < prec_val; } int main(int argc, char **argv) { - // initalize google test, includes command line options - ::testing::InitGoogleTest(&argc, argv); - // return code for google test - int test_rc = 0; - xdim=ydim=zdim=tdim=32; - // command line options auto app = make_app(); add_gaugefix_option_group(app); add_heatbath_option_group(app); + CLI::TransformPairs test_type_map {{"OVR", 0}, {"FFT", 1}}; + app->add_option("--test", test_type, "Test method")->transform(CLI::CheckedTransformer(test_type_map)); try { app->parse(argc, argv); } catch (const CLI::ParseError &e) { return app->exit(e); } + if (prec_sloppy == QUDA_INVALID_PRECISION) prec_sloppy = prec; + if (link_recon_sloppy == QUDA_RECONSTRUCT_INVALID) link_recon_sloppy = link_recon; + + // initialize QMP/MPI, QUDA comms grid and RNG (host_utils.cpp) initComms(argc, argv, gridsize_from_cmdline); - // Ensure gtest prints only from rank 0 - ::testing::TestEventListeners &listeners = ::testing::UnitTest::GetInstance()->listeners(); - if (comm_rank() != 0) { delete listeners.Release(listeners.default_result_printer()); } + // call srand() with a rank-dependent seed + initRand(); + + display_test_info(); + // initialize the QUDA library initQuda(device_ordinal); - test_rc = RUN_ALL_TESTS(); - endQuda(); - finalizeComms(); + // *** QUDA parameters begin here. + setVerbosity(QUDA_VERBOSE); + QudaGaugeParam param = newQudaGaugeParam(); - return test_rc; + double3 plaq; + cudaGaugeField *U; + int nsteps = heatbath_num_steps; + int nhbsteps = heatbath_num_heatbath_per_step; + int novrsteps = heatbath_num_overrelax_per_step; + bool coldstart = heatbath_coldstart; + double beta_value = heatbath_beta_value; + + RNG * randstates; + + // Setup gauge container. + param.cpu_prec = prec; + param.cpu_prec = prec; + param.cuda_prec = prec; + param.reconstruct = link_recon; + param.cuda_prec_sloppy = prec; + param.reconstruct_sloppy = link_recon; + + param.type = QUDA_WILSON_LINKS; + param.gauge_order = QUDA_MILC_GAUGE_ORDER; + + param.X[0] = xdim; + param.X[1] = ydim; + param.X[2] = zdim; + param.X[3] = tdim; + setDims(param.X); + + param.anisotropy = 1.0; //don't support anisotropy for now!!!!!! + param.t_boundary = QUDA_PERIODIC_T; + param.gauge_fix = QUDA_GAUGE_FIXED_NO; + param.ga_pad = 0; + + GaugeFieldParam gParam(0, param); + gParam.pad = 0; + gParam.ghostExchange = QUDA_GHOST_EXCHANGE_NO; + gParam.create = QUDA_NULL_FIELD_CREATE; + gParam.link_type = param.type; + gParam.reconstruct = param.reconstruct; + gParam.setPrecision(gParam.Precision(), true); + + int y[4]; + int R[4] = {0,0,0,0}; + for(int dir=0; dir<4; ++dir) if(comm_dim_partitioned(dir)) R[dir] = 2; + for(int dir=0; dir<4; ++dir) y[dir] = param.X[dir] + 2 * R[dir]; + int pad = 0; + GaugeFieldParam gParamEx(y, prec, link_recon, + pad, QUDA_VECTOR_GEOMETRY, QUDA_GHOST_EXCHANGE_EXTENDED); + gParamEx.create = QUDA_ZERO_FIELD_CREATE; + gParamEx.order = gParam.order; + gParamEx.siteSubset = QUDA_FULL_SITE_SUBSET; + gParamEx.t_boundary = gParam.t_boundary; + gParamEx.nFace = 1; + for(int dir=0; dir<4; ++dir) gParamEx.r[dir] = R[dir]; + U = new cudaGaugeField(gParamEx); + + // CURAND random generator initialization + randstates = new RNG(gParam, 1234); + randstates->Init(); + + int *num_failures_h = (int *)mapped_malloc(sizeof(int)); + int *num_failures_d = (int *)get_mapped_device_pointer(num_failures_h); + + if (link_recon != QUDA_RECONSTRUCT_8 && coldstart) + InitGaugeField(*U); + else + InitGaugeField(*U, *randstates); + + // Reunitarization setup + SetReunitarizationConsts(); + plaquette(*U); + + for(int step=1; step<=nsteps; ++step){ + printfQuda("Step %d\n",step); + Monte(*U, *randstates, beta_value, nhbsteps, novrsteps); + + //Reunitarize gauge links... + *num_failures_h = 0; + unitarizeLinks(*U, num_failures_d); + qudaDeviceSynchronize(); + if (*num_failures_h > 0) errorQuda("Error in the unitarization\n"); + + plaquette(*U); + } + + plaq = plaquette(*U); + printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z); + + host_free(num_failures_h); + + // Gauge Fixing Routines + //--------------------------------------------------------------------------- + switch (test_type) { + case 0: + printfQuda("%s gauge fixing with overrelaxation\n", gf_gauge_dir == 4 ? "Landau" : "Coulomb"); + gaugeFixingOVR(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, gf_theta_condition); + comparePlaquette(plaq, plaquette(*U)); + break; + + case 1: + if (!checkDimsPartitioned()) { + printfQuda("%s gauge fixing with steepest descent method with FFTs\n", gf_gauge_dir == 4 ? "Landau" : "Coulomb"); + gaugeFixingFFT(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, gf_theta_condition); + comparePlaquette(plaq, plaquette(*U)); + } else { + errorQuda("FFT gauge fixing not supported for multi GPU geometry"); + } + break; + + default: + errorQuda("Unknown test type %d", test_type); + } + + double2 link_trace = getLinkTrace(*U); + printfQuda("Tr: %.16e:%.16e\n", link_trace.x/3.0, link_trace.y/3.0); + + // Save if output string is specified + if (strcmp(gauge_outfile,"")) { + + printfQuda("Saving the gauge field to file %s\n", gauge_outfile); + + QudaGaugeParam gauge_param = newQudaGaugeParam(); + setWilsonGaugeParam(gauge_param); + + void *cpu_gauge[4]; + for (int dir = 0; dir < 4; dir++) { cpu_gauge[dir] = malloc(V * gauge_site_size * gauge_param.cpu_prec); } + + cudaGaugeField *gauge; + gauge = new cudaGaugeField(gParam); + + // copy into regular field + copyExtendedGauge(*gauge, *U, QUDA_CUDA_FIELD_LOCATION); + saveGaugeFieldQuda((void*)cpu_gauge, (void*)gauge, &gauge_param); + + // Write to disk + write_gauge_field(gauge_outfile, cpu_gauge, gauge_param.cpu_prec, gauge_param.X, 0, (char**)0); + + for (int dir = 0; dir<4; dir++) free(cpu_gauge[dir]); + delete gauge; + } else { + printfQuda("No output file specified.\n"); + } + + delete U; + + //Release all temporary memory used for data exchange between GPUs in multi-GPU mode + PGaugeExchangeFree(); + + randstates->Release(); + delete randstates; + + freeGaugeQuda(); + endQuda(); + finalizeComms(); + + return 0; } diff --git a/tests/su3_test.cpp b/tests/su3_test.cpp index 86827d3096..1c0b75c3a8 100644 --- a/tests/su3_test.cpp +++ b/tests/su3_test.cpp @@ -114,34 +114,34 @@ int main(int argc, char **argv) // initialize QMP/MPI, QUDA comms grid and RNG (host_utils.cpp) initComms(argc, argv, gridsize_from_cmdline); - QudaGaugeParam gauge_param = newQudaGaugeParam(); - if (prec_sloppy == QUDA_INVALID_PRECISION) - prec_sloppy = prec; - if (link_recon_sloppy == QUDA_RECONSTRUCT_INVALID) - link_recon_sloppy = link_recon; + if (prec_sloppy == QUDA_INVALID_PRECISION) prec_sloppy = prec; + if (link_recon_sloppy == QUDA_RECONSTRUCT_INVALID) link_recon_sloppy = link_recon; + + initQuda(device_ordinal); + setVerbosity(verbosity); + + // call srand() with a rank-dependent seed + initRand(); + QudaGaugeParam gauge_param = newQudaGaugeParam(); setGaugeParam(gauge_param); setDims(gauge_param.X); - + + // All user inputs now defined + display_test_info(); + + // *** QUDA parameters begin here. void *gauge[4], *new_gauge[4]; - for (int dir = 0; dir < 4; dir++) { gauge[dir] = malloc(V * gauge_site_size * host_gauge_data_type_size); new_gauge[dir] = malloc(V * gauge_site_size * host_gauge_data_type_size); } - - initQuda(device_ordinal); - - setVerbosity(verbosity); - - // call srand() with a rank-dependent seed - initRand(); - + constructHostGaugeField(gauge, gauge_param, argc, argv); // Load the gauge field to the device loadGaugeQuda((void *)gauge, &gauge_param); saveGaugeQuda(new_gauge, &gauge_param); - + double plaq[3]; plaqQuda(plaq); printfQuda("Computed plaquette gauge precise is %.16e (spatial = %.16e, temporal = %.16e)\n", plaq[0], plaq[1], @@ -149,9 +149,6 @@ int main(int argc, char **argv) #ifdef GPU_GAUGE_TOOLS - // All user inputs now defined - display_test_info(); - // Topological charge and gauge energy double q_charge_check = 0.0; // Size of floating point data From d9bbbfa594b51645e3058ea009ea73cab12d70e1 Mon Sep 17 00:00:00 2001 From: cpviolator Date: Fri, 30 Apr 2021 18:48:50 -0700 Subject: [PATCH 03/32] Move the gauge_alg_test to a ctest, make a new interface to the gauge fixing that allows for fine grained control and gauge IO. --- lib/interface_quda.cpp | 87 ++++---- tests/CMakeLists.txt | 4 + tests/gauge_alg_ctest.cpp | 274 ++++++++++++++++++++++++ tests/gauge_alg_test.cpp | 428 +++++++++++++++++++------------------- tests/su3_test.cpp | 35 ++-- 5 files changed, 543 insertions(+), 285 deletions(-) create mode 100644 tests/gauge_alg_ctest.cpp diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp index 3af70859d8..6638a32046 100644 --- a/lib/interface_quda.cpp +++ b/lib/interface_quda.cpp @@ -236,8 +236,8 @@ static TimeProfile profileMomAction("momActionQuda"); static TimeProfile profileEnd("endQuda"); //!< Profiler for GaugeFixing -static TimeProfile GaugeFixFFTQuda("GaugeFixFFTQuda"); -static TimeProfile GaugeFixOVRQuda("GaugeFixOVRQuda"); +static TimeProfile profileGaugeFixFFT("gaugeFixFFTQuda"); +static TimeProfile profileGaugeFixOVR("gaugeFixOVRQuda"); //!< Profiler for toal time spend between init and end static TimeProfile profileInit2End("initQuda-endQuda",false); @@ -1535,6 +1535,8 @@ void endQuda(void) profileProject.Print(); profilePhase.Print(); profileMomAction.Print(); + profileGaugeFixOVR.Print(); + profileGaugeFixFFT.Print(); profileEnd.Print(); profileInit2End.Print(); @@ -5809,11 +5811,11 @@ int computeGaugeFixingOVRQuda(void *gauge, const unsigned int gauge_dir, const u const unsigned int reunit_interval, const unsigned int stopWtheta, QudaGaugeParam *param, double *timeinfo) { - GaugeFixOVRQuda.TPSTART(QUDA_PROFILE_TOTAL); - + profileGaugeFixOVR.TPSTART(QUDA_PROFILE_TOTAL); + checkGaugeParam(param); - GaugeFixOVRQuda.TPSTART(QUDA_PROFILE_INIT); + profileGaugeFixOVR.TPSTART(QUDA_PROFILE_INIT); GaugeFieldParam gParam(gauge, *param); auto *cpuGauge = new cpuGaugeField(gParam); @@ -5824,44 +5826,37 @@ int computeGaugeFixingOVRQuda(void *gauge, const unsigned int gauge_dir, const u gParam.setPrecision(gParam.Precision(), true); auto *cudaInGauge = new cudaGaugeField(gParam); - GaugeFixOVRQuda.TPSTOP(QUDA_PROFILE_INIT); - GaugeFixOVRQuda.TPSTART(QUDA_PROFILE_H2D); + profileGaugeFixOVR.TPSTOP(QUDA_PROFILE_INIT); + profileGaugeFixOVR.TPSTART(QUDA_PROFILE_H2D); - ///if (!param->use_resident_gauge) { // load fields onto the device cudaInGauge->loadCPUField(*cpuGauge); - /* } else { // or use resident fields already present - if (!gaugePrecise) errorQuda("No resident gauge field allocated"); - cudaInGauge = gaugePrecise; - gaugePrecise = nullptr; - } */ - GaugeFixOVRQuda.TPSTOP(QUDA_PROFILE_H2D); + profileGaugeFixOVR.TPSTOP(QUDA_PROFILE_H2D); if (comm_size() == 1) { // perform the update - GaugeFixOVRQuda.TPSTART(QUDA_PROFILE_COMPUTE); + profileGaugeFixOVR.TPSTART(QUDA_PROFILE_COMPUTE); gaugeFixingOVR(*cudaInGauge, gauge_dir, Nsteps, verbose_interval, relax_boost, tolerance, reunit_interval, stopWtheta); - GaugeFixOVRQuda.TPSTOP(QUDA_PROFILE_COMPUTE); + profileGaugeFixOVR.TPSTOP(QUDA_PROFILE_COMPUTE); } else { - cudaGaugeField *cudaInGaugeEx = createExtendedGauge(*cudaInGauge, R, GaugeFixOVRQuda); + cudaGaugeField *cudaInGaugeEx = createExtendedGauge(*cudaInGauge, R, profileGaugeFixOVR); - // perform the update - GaugeFixOVRQuda.TPSTART(QUDA_PROFILE_COMPUTE); + // Perform the update + profileGaugeFixOVR.TPSTART(QUDA_PROFILE_COMPUTE); gaugeFixingOVR(*cudaInGaugeEx, gauge_dir, Nsteps, verbose_interval, relax_boost, tolerance, reunit_interval, stopWtheta); - GaugeFixOVRQuda.TPSTOP(QUDA_PROFILE_COMPUTE); + profileGaugeFixOVR.TPSTOP(QUDA_PROFILE_COMPUTE); - //HOW TO COPY BACK TO CPU: cudaInGaugeEx->cpuGauge copyExtendedGauge(*cudaInGauge, *cudaInGaugeEx, QUDA_CUDA_FIELD_LOCATION); } - - // copy the gauge field back to the host - GaugeFixOVRQuda.TPSTART(QUDA_PROFILE_D2H); + + // Copy the gauge field back to the host + profileGaugeFixOVR.TPSTART(QUDA_PROFILE_D2H); cudaInGauge->saveCPUField(*cpuGauge); - GaugeFixOVRQuda.TPSTOP(QUDA_PROFILE_D2H); + profileGaugeFixOVR.TPSTOP(QUDA_PROFILE_D2H); - GaugeFixOVRQuda.TPSTOP(QUDA_PROFILE_TOTAL); + profileGaugeFixOVR.TPSTOP(QUDA_PROFILE_TOTAL); if (param->make_resident_gauge) { if (gaugePrecise != nullptr) delete gaugePrecise; @@ -5871,9 +5866,9 @@ int computeGaugeFixingOVRQuda(void *gauge, const unsigned int gauge_dir, const u } if(timeinfo){ - timeinfo[0] = GaugeFixOVRQuda.Last(QUDA_PROFILE_H2D); - timeinfo[1] = GaugeFixOVRQuda.Last(QUDA_PROFILE_COMPUTE); - timeinfo[2] = GaugeFixOVRQuda.Last(QUDA_PROFILE_D2H); + timeinfo[0] = profileGaugeFixOVR.Last(QUDA_PROFILE_H2D); + timeinfo[1] = profileGaugeFixOVR.Last(QUDA_PROFILE_COMPUTE); + timeinfo[2] = profileGaugeFixOVR.Last(QUDA_PROFILE_D2H); } return 0; @@ -5883,11 +5878,11 @@ int computeGaugeFixingFFTQuda(void* gauge, const unsigned int gauge_dir, const const unsigned int verbose_interval, const double alpha, const unsigned int autotune, const double tolerance, \ const unsigned int stopWtheta, QudaGaugeParam* param , double* timeinfo) { - GaugeFixFFTQuda.TPSTART(QUDA_PROFILE_TOTAL); + profileGaugeFixFFT.TPSTART(QUDA_PROFILE_TOTAL); checkGaugeParam(param); - GaugeFixFFTQuda.TPSTART(QUDA_PROFILE_INIT); + profileGaugeFixFFT.TPSTART(QUDA_PROFILE_INIT); GaugeFieldParam gParam(gauge, *param); auto *cpuGauge = new cpuGaugeField(gParam); @@ -5900,33 +5895,27 @@ int computeGaugeFixingFFTQuda(void* gauge, const unsigned int gauge_dir, const auto *cudaInGauge = new cudaGaugeField(gParam); - GaugeFixFFTQuda.TPSTOP(QUDA_PROFILE_INIT); + profileGaugeFixFFT.TPSTOP(QUDA_PROFILE_INIT); - GaugeFixFFTQuda.TPSTART(QUDA_PROFILE_H2D); + profileGaugeFixFFT.TPSTART(QUDA_PROFILE_H2D); - //if (!param->use_resident_gauge) { // load fields onto the device cudaInGauge->loadCPUField(*cpuGauge); - /*} else { // or use resident fields already present - if (!gaugePrecise) errorQuda("No resident gauge field allocated"); - cudaInGauge = gaugePrecise; - gaugePrecise = nullptr; - } */ - GaugeFixFFTQuda.TPSTOP(QUDA_PROFILE_H2D); + profileGaugeFixFFT.TPSTOP(QUDA_PROFILE_H2D); // perform the update - GaugeFixFFTQuda.TPSTART(QUDA_PROFILE_COMPUTE); + profileGaugeFixFFT.TPSTART(QUDA_PROFILE_COMPUTE); gaugeFixingFFT(*cudaInGauge, gauge_dir, Nsteps, verbose_interval, alpha, autotune, tolerance, stopWtheta); - GaugeFixFFTQuda.TPSTOP(QUDA_PROFILE_COMPUTE); + profileGaugeFixFFT.TPSTOP(QUDA_PROFILE_COMPUTE); // copy the gauge field back to the host - GaugeFixFFTQuda.TPSTART(QUDA_PROFILE_D2H); + profileGaugeFixFFT.TPSTART(QUDA_PROFILE_D2H); cudaInGauge->saveCPUField(*cpuGauge); - GaugeFixFFTQuda.TPSTOP(QUDA_PROFILE_D2H); + profileGaugeFixFFT.TPSTOP(QUDA_PROFILE_D2H); - GaugeFixFFTQuda.TPSTOP(QUDA_PROFILE_TOTAL); + profileGaugeFixFFT.TPSTOP(QUDA_PROFILE_TOTAL); if (param->make_resident_gauge) { if (gaugePrecise != nullptr) delete gaugePrecise; @@ -5934,11 +5923,11 @@ int computeGaugeFixingFFTQuda(void* gauge, const unsigned int gauge_dir, const } else { delete cudaInGauge; } - + if (timeinfo) { - timeinfo[0] = GaugeFixFFTQuda.Last(QUDA_PROFILE_H2D); - timeinfo[1] = GaugeFixFFTQuda.Last(QUDA_PROFILE_COMPUTE); - timeinfo[2] = GaugeFixFFTQuda.Last(QUDA_PROFILE_D2H); + timeinfo[0] = profileGaugeFixFFT.Last(QUDA_PROFILE_H2D); + timeinfo[1] = profileGaugeFixFFT.Last(QUDA_PROFILE_COMPUTE); + timeinfo[2] = profileGaugeFixFFT.Last(QUDA_PROFILE_D2H); } return 0; diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 7c357e67ac..ed9e9df62f 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -239,6 +239,10 @@ if(QUDA_GAUGE_ALG) add_executable(gauge_alg_test gauge_alg_test.cpp) target_link_libraries(gauge_alg_test ${TEST_LIBS}) quda_checkbuildtest(gauge_alg_test QUDA_BUILD_ALL_TESTS) + + add_executable(gauge_alg_ctest gauge_alg_ctest.cpp) + target_link_libraries(gauge_alg_ctest ${TEST_LIBS}) + quda_checkbuildtest(gauge_alg_ctest QUDA_BUILD_ALL_TESTS) install(TARGETS gauge_alg_test ${QUDA_EXCLUDE_FROM_INSTALL} DESTINATION ${CMAKE_INSTALL_BINDIR}) add_executable(heatbath_test heatbath_test.cpp) diff --git a/tests/gauge_alg_ctest.cpp b/tests/gauge_alg_ctest.cpp new file mode 100644 index 0000000000..410d2304bd --- /dev/null +++ b/tests/gauge_alg_ctest.cpp @@ -0,0 +1,274 @@ +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + +#include + +#include + +using namespace quda; + +class GaugeAlgTest : public ::testing::Test +{ + protected: + + QudaGaugeParam param; + + Timer a0,a1; + double2 detu; + double3 plaq; + cudaGaugeField *U; + int nsteps; + int nhbsteps; + int novrsteps; + bool coldstart; + double beta_value; + + RNG * randstates; + + + void SetReunitarizationConsts(){ + const double unitarize_eps = 1e-14; + const double max_error = 1e-10; + const int reunit_allow_svd = 1; + const int reunit_svd_only = 0; + const double svd_rel_error = 1e-6; + const double svd_abs_error = 1e-6; + setUnitarizeLinksConstants(unitarize_eps, max_error, + reunit_allow_svd, reunit_svd_only, + svd_rel_error, svd_abs_error); + + } + + bool checkDimsPartitioned() + { + if (comm_dim_partitioned(0) || comm_dim_partitioned(1) || comm_dim_partitioned(2) || comm_dim_partitioned(3)) + return true; + return false; + } + + bool comparePlaquette(double3 a, double3 b){ + double a0,a1,a2; + a0 = std::abs(a.x - b.x); + a1 = std::abs(a.y - b.y); + a2 = std::abs(a.z - b.z); + double prec_val = 1.0e-5; + if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1.0e-15; + if ((a0 < prec_val) && (a1 < prec_val) && (a2 < prec_val)) return true; + return false; + } + + bool CheckDeterminant(double2 detu){ + double prec_val = 5e-8; + if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1.0e-15; + if (std::abs(1.0 - detu.x) < prec_val && std::abs(detu.y) < prec_val) return true; + return false; + } + + virtual void SetUp() { + setVerbosity(QUDA_VERBOSE); + + param = newQudaGaugeParam(); + + // Setup gauge container. + param.cpu_prec = prec; + param.cpu_prec = prec; + param.cuda_prec = prec; + param.reconstruct = link_recon; + param.cuda_prec_sloppy = prec; + param.reconstruct_sloppy = link_recon; + + param.type = QUDA_WILSON_LINKS; + param.gauge_order = QUDA_MILC_GAUGE_ORDER; + + param.X[0] = xdim; + param.X[1] = ydim; + param.X[2] = zdim; + param.X[3] = tdim; + setDims(param.X); + + param.anisotropy = 1.0; //don't support anisotropy for now!!!!!! + param.t_boundary = QUDA_PERIODIC_T; + param.gauge_fix = QUDA_GAUGE_FIXED_NO; + param.ga_pad = 0; + + GaugeFieldParam gParam(0, param); + gParam.pad = 0; + gParam.ghostExchange = QUDA_GHOST_EXCHANGE_NO; + gParam.create = QUDA_NULL_FIELD_CREATE; + gParam.link_type = param.type; + gParam.reconstruct = param.reconstruct; + gParam.setPrecision(gParam.Precision(), true); + +#ifdef MULTI_GPU + int y[4]; + int R[4] = {0,0,0,0}; + for(int dir=0; dir<4; ++dir) if(comm_dim_partitioned(dir)) R[dir] = 2; + for(int dir=0; dir<4; ++dir) y[dir] = param.X[dir] + 2 * R[dir]; + int pad = 0; + GaugeFieldParam gParamEx(y, prec, link_recon, + pad, QUDA_VECTOR_GEOMETRY, QUDA_GHOST_EXCHANGE_EXTENDED); + gParamEx.create = QUDA_ZERO_FIELD_CREATE; + gParamEx.order = gParam.order; + gParamEx.siteSubset = QUDA_FULL_SITE_SUBSET; + gParamEx.t_boundary = gParam.t_boundary; + gParamEx.nFace = 1; + for(int dir=0; dir<4; ++dir) gParamEx.r[dir] = R[dir]; + U = new cudaGaugeField(gParamEx); +#else + U = new cudaGaugeField(gParam); +#endif + // CURAND random generator initialization + randstates = new RNG(gParam, 1234); + randstates->Init(); + + nsteps = heatbath_num_steps; + nhbsteps = heatbath_num_heatbath_per_step; + novrsteps = heatbath_num_overrelax_per_step; + coldstart = heatbath_coldstart; + beta_value = heatbath_beta_value; + + a0.Start(__func__, __FILE__, __LINE__); + a1.Start(__func__, __FILE__, __LINE__); + + int *num_failures_h = (int *)mapped_malloc(sizeof(int)); + int *num_failures_d = (int *)get_mapped_device_pointer(num_failures_h); + + if (link_recon != QUDA_RECONSTRUCT_8 && coldstart) + InitGaugeField(*U); + else + InitGaugeField(*U, *randstates); + + // Reunitarization setup + SetReunitarizationConsts(); + plaquette(*U); + + for(int step=1; step<=nsteps; ++step){ + printfQuda("Step %d\n",step); + Monte(*U, *randstates, beta_value, nhbsteps, novrsteps); + + //Reunitarize gauge links... + *num_failures_h = 0; + unitarizeLinks(*U, num_failures_d); + qudaDeviceSynchronize(); + if (*num_failures_h > 0) errorQuda("Error in the unitarization\n"); + + plaquette(*U); + } + a1.Stop(__func__, __FILE__, __LINE__); + + printfQuda("Time Monte -> %.6f s\n", a1.Last()); + plaq = plaquette(*U); + printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z); + + host_free(num_failures_h); + } + + virtual void TearDown() { + detu = getLinkDeterminant(*U); + double2 tru = getLinkTrace(*U); + printfQuda("Det: %.16e:%.16e\n", detu.x, detu.y); + printfQuda("Tr: %.16e:%.16e\n", tru.x/3.0, tru.y/3.0); + + delete U; + //Release all temporary memory used for data exchange between GPUs in multi-GPU mode + PGaugeExchangeFree(); + + a0.Stop(__func__, __FILE__, __LINE__); + printfQuda("Time -> %.6f s\n", a0.Last()); + randstates->Release(); + delete randstates; + } +}; + +TEST_F(GaugeAlgTest, Generation) +{ + detu = getLinkDeterminant(*U); + ASSERT_TRUE(CheckDeterminant(detu)); +} + +TEST_F(GaugeAlgTest, Landau_Overrelaxation) +{ + printfQuda("Landau gauge fixing with overrelaxation\n"); + gaugeFixingOVR(*U, 4, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, gf_theta_condition); + auto plaq_gf = plaquette(*U); + printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); + ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); +} + +TEST_F(GaugeAlgTest, Coulomb_Overrelaxation) +{ + printfQuda("Coulomb gauge fixing with overrelaxation\n"); + gaugeFixingOVR(*U, 3, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, gf_theta_condition); + auto plaq_gf = plaquette(*U); + printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); + ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); +} + +TEST_F(GaugeAlgTest, Landau_FFT) +{ + if (!checkDimsPartitioned()) { + printfQuda("Landau gauge fixing with steepest descent method with FFTs\n"); + gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, gf_theta_condition); + auto plaq_gf = plaquette(*U); + printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); + ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); + } +} + +TEST_F(GaugeAlgTest, Coulomb_FFT) +{ + if (!checkDimsPartitioned()) { + printfQuda("Coulomb gauge fixing with steepest descent method with FFTs\n"); + gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, gf_theta_condition); + auto plaq_gf = plaquette(*U); + printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); + ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); + } +} + +int main(int argc, char **argv) +{ + // initalize google test, includes command line options + ::testing::InitGoogleTest(&argc, argv); + // return code for google test + int test_rc = 0; + xdim=ydim=zdim=tdim=32; + + // command line options + auto app = make_app(); + add_gaugefix_option_group(app); + add_heatbath_option_group(app); + try { + app->parse(argc, argv); + } catch (const CLI::ParseError &e) { + return app->exit(e); + } + + initComms(argc, argv, gridsize_from_cmdline); + + // Ensure gtest prints only from rank 0 + ::testing::TestEventListeners &listeners = ::testing::UnitTest::GetInstance()->listeners(); + if (comm_rank() != 0) { delete listeners.Release(listeners.default_result_printer()); } + + initQuda(device_ordinal); + test_rc = RUN_ALL_TESTS(); + endQuda(); + + finalizeComms(); + + return test_rc; +} diff --git a/tests/gauge_alg_test.cpp b/tests/gauge_alg_test.cpp index 410d2304bd..5579460ab7 100644 --- a/tests/gauge_alg_test.cpp +++ b/tests/gauge_alg_test.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -21,254 +22,247 @@ using namespace quda; -class GaugeAlgTest : public ::testing::Test -{ - protected: - - QudaGaugeParam param; +void display_test_info() +{ + printfQuda("running the following test:\n"); - Timer a0,a1; - double2 detu; - double3 plaq; - cudaGaugeField *U; - int nsteps; - int nhbsteps; - int novrsteps; - bool coldstart; - double beta_value; + printfQuda("prec sloppy_prec link_recon sloppy_link_recon S_dimension T_dimension Ls_dimension\n"); + printfQuda("%s %s %s %s %d/%d/%d %d %d\n", get_prec_str(prec), + get_prec_str(prec_sloppy), get_recon_str(link_recon), get_recon_str(link_recon_sloppy), xdim, ydim, zdim, + tdim, Lsdim); + + printfQuda("Grid partition info: X Y Z T\n"); + printfQuda(" %d %d %d %d\n", dimPartitioned(0), dimPartitioned(1), dimPartitioned(2), + dimPartitioned(3)); +} - RNG * randstates; +void SetReunitarizationConsts(){ + const double unitarize_eps = 1e-14; + const double max_error = 1e-10; + const int reunit_allow_svd = 1; + const int reunit_svd_only = 0; + const double svd_rel_error = 1e-6; + const double svd_abs_error = 1e-6; + setUnitarizeLinksConstants(unitarize_eps, max_error, + reunit_allow_svd, reunit_svd_only, + svd_rel_error, svd_abs_error); - void SetReunitarizationConsts(){ - const double unitarize_eps = 1e-14; - const double max_error = 1e-10; - const int reunit_allow_svd = 1; - const int reunit_svd_only = 0; - const double svd_rel_error = 1e-6; - const double svd_abs_error = 1e-6; - setUnitarizeLinksConstants(unitarize_eps, max_error, - reunit_allow_svd, reunit_svd_only, - svd_rel_error, svd_abs_error); - - } - - bool checkDimsPartitioned() - { - if (comm_dim_partitioned(0) || comm_dim_partitioned(1) || comm_dim_partitioned(2) || comm_dim_partitioned(3)) - return true; - return false; - } - - bool comparePlaquette(double3 a, double3 b){ - double a0,a1,a2; - a0 = std::abs(a.x - b.x); - a1 = std::abs(a.y - b.y); - a2 = std::abs(a.z - b.z); - double prec_val = 1.0e-5; - if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1.0e-15; - if ((a0 < prec_val) && (a1 < prec_val) && (a2 < prec_val)) return true; - return false; - } - - bool CheckDeterminant(double2 detu){ - double prec_val = 5e-8; - if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1.0e-15; - if (std::abs(1.0 - detu.x) < prec_val && std::abs(detu.y) < prec_val) return true; - return false; - } - - virtual void SetUp() { - setVerbosity(QUDA_VERBOSE); - - param = newQudaGaugeParam(); - - // Setup gauge container. - param.cpu_prec = prec; - param.cpu_prec = prec; - param.cuda_prec = prec; - param.reconstruct = link_recon; - param.cuda_prec_sloppy = prec; - param.reconstruct_sloppy = link_recon; - - param.type = QUDA_WILSON_LINKS; - param.gauge_order = QUDA_MILC_GAUGE_ORDER; - - param.X[0] = xdim; - param.X[1] = ydim; - param.X[2] = zdim; - param.X[3] = tdim; - setDims(param.X); - - param.anisotropy = 1.0; //don't support anisotropy for now!!!!!! - param.t_boundary = QUDA_PERIODIC_T; - param.gauge_fix = QUDA_GAUGE_FIXED_NO; - param.ga_pad = 0; - - GaugeFieldParam gParam(0, param); - gParam.pad = 0; - gParam.ghostExchange = QUDA_GHOST_EXCHANGE_NO; - gParam.create = QUDA_NULL_FIELD_CREATE; - gParam.link_type = param.type; - gParam.reconstruct = param.reconstruct; - gParam.setPrecision(gParam.Precision(), true); - -#ifdef MULTI_GPU - int y[4]; - int R[4] = {0,0,0,0}; - for(int dir=0; dir<4; ++dir) if(comm_dim_partitioned(dir)) R[dir] = 2; - for(int dir=0; dir<4; ++dir) y[dir] = param.X[dir] + 2 * R[dir]; - int pad = 0; - GaugeFieldParam gParamEx(y, prec, link_recon, - pad, QUDA_VECTOR_GEOMETRY, QUDA_GHOST_EXCHANGE_EXTENDED); - gParamEx.create = QUDA_ZERO_FIELD_CREATE; - gParamEx.order = gParam.order; - gParamEx.siteSubset = QUDA_FULL_SITE_SUBSET; - gParamEx.t_boundary = gParam.t_boundary; - gParamEx.nFace = 1; - for(int dir=0; dir<4; ++dir) gParamEx.r[dir] = R[dir]; - U = new cudaGaugeField(gParamEx); -#else - U = new cudaGaugeField(gParam); -#endif - // CURAND random generator initialization - randstates = new RNG(gParam, 1234); - randstates->Init(); - - nsteps = heatbath_num_steps; - nhbsteps = heatbath_num_heatbath_per_step; - novrsteps = heatbath_num_overrelax_per_step; - coldstart = heatbath_coldstart; - beta_value = heatbath_beta_value; - - a0.Start(__func__, __FILE__, __LINE__); - a1.Start(__func__, __FILE__, __LINE__); - - int *num_failures_h = (int *)mapped_malloc(sizeof(int)); - int *num_failures_d = (int *)get_mapped_device_pointer(num_failures_h); - - if (link_recon != QUDA_RECONSTRUCT_8 && coldstart) - InitGaugeField(*U); - else - InitGaugeField(*U, *randstates); - - // Reunitarization setup - SetReunitarizationConsts(); - plaquette(*U); - - for(int step=1; step<=nsteps; ++step){ - printfQuda("Step %d\n",step); - Monte(*U, *randstates, beta_value, nhbsteps, novrsteps); - - //Reunitarize gauge links... - *num_failures_h = 0; - unitarizeLinks(*U, num_failures_d); - qudaDeviceSynchronize(); - if (*num_failures_h > 0) errorQuda("Error in the unitarization\n"); - - plaquette(*U); - } - a1.Stop(__func__, __FILE__, __LINE__); - - printfQuda("Time Monte -> %.6f s\n", a1.Last()); - plaq = plaquette(*U); - printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z); - - host_free(num_failures_h); - } - - virtual void TearDown() { - detu = getLinkDeterminant(*U); - double2 tru = getLinkTrace(*U); - printfQuda("Det: %.16e:%.16e\n", detu.x, detu.y); - printfQuda("Tr: %.16e:%.16e\n", tru.x/3.0, tru.y/3.0); - - delete U; - //Release all temporary memory used for data exchange between GPUs in multi-GPU mode - PGaugeExchangeFree(); - - a0.Stop(__func__, __FILE__, __LINE__); - printfQuda("Time -> %.6f s\n", a0.Last()); - randstates->Release(); - delete randstates; - } -}; - -TEST_F(GaugeAlgTest, Generation) -{ - detu = getLinkDeterminant(*U); - ASSERT_TRUE(CheckDeterminant(detu)); -} - -TEST_F(GaugeAlgTest, Landau_Overrelaxation) -{ - printfQuda("Landau gauge fixing with overrelaxation\n"); - gaugeFixingOVR(*U, 4, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, gf_theta_condition); - auto plaq_gf = plaquette(*U); - printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); - ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); } -TEST_F(GaugeAlgTest, Coulomb_Overrelaxation) +bool checkDimsPartitioned() { - printfQuda("Coulomb gauge fixing with overrelaxation\n"); - gaugeFixingOVR(*U, 3, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, gf_theta_condition); - auto plaq_gf = plaquette(*U); - printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); - ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); + if (comm_dim_partitioned(0) || comm_dim_partitioned(1) || comm_dim_partitioned(2) || comm_dim_partitioned(3)) + return true; + return false; } -TEST_F(GaugeAlgTest, Landau_FFT) +bool comparePlaquette(double3 a, double3 b) { - if (!checkDimsPartitioned()) { - printfQuda("Landau gauge fixing with steepest descent method with FFTs\n"); - gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, gf_theta_condition); - auto plaq_gf = plaquette(*U); - printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); - ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); - } + printfQuda("Plaq: %.16e, %.16e, %.16e\n", a.x, a.y, a.z); + printfQuda("Plaq_gf: %.16e, %.16e, %.16e\n", b.x, b.y, b.z); + double a0,a1,a2; + a0 = std::abs(a.x - b.x); + a1 = std::abs(a.y - b.y); + a2 = std::abs(a.z - b.z); + double prec_val = 1.0e-5; + if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1.0e-15; + return ((a0 < prec_val) && (a1 < prec_val) && (a2 < prec_val)); } -TEST_F(GaugeAlgTest, Coulomb_FFT) +bool checkDeterminant(double2 detu) { - if (!checkDimsPartitioned()) { - printfQuda("Coulomb gauge fixing with steepest descent method with FFTs\n"); - gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, gf_theta_condition); - auto plaq_gf = plaquette(*U); - printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); - ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); - } + printfQuda("Det: %.16e: %.16e\n", detu.x, detu.y); + double prec_val = 5e-8; + if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1.0e-15; + return std::abs(1.0 - detu.x) < prec_val && std::abs(detu.y) < prec_val; } int main(int argc, char **argv) { - // initalize google test, includes command line options - ::testing::InitGoogleTest(&argc, argv); - // return code for google test - int test_rc = 0; - xdim=ydim=zdim=tdim=32; - // command line options auto app = make_app(); add_gaugefix_option_group(app); add_heatbath_option_group(app); + CLI::TransformPairs test_type_map {{"OVR", 0}, {"FFT", 1}}; + app->add_option("--test", test_type, "Test method")->transform(CLI::CheckedTransformer(test_type_map)); try { app->parse(argc, argv); } catch (const CLI::ParseError &e) { return app->exit(e); } + if (prec_sloppy == QUDA_INVALID_PRECISION) prec_sloppy = prec; + if (link_recon_sloppy == QUDA_RECONSTRUCT_INVALID) link_recon_sloppy = link_recon; + + // initialize QMP/MPI, QUDA comms grid and RNG (host_utils.cpp) initComms(argc, argv, gridsize_from_cmdline); - // Ensure gtest prints only from rank 0 - ::testing::TestEventListeners &listeners = ::testing::UnitTest::GetInstance()->listeners(); - if (comm_rank() != 0) { delete listeners.Release(listeners.default_result_printer()); } + // call srand() with a rank-dependent seed + initRand(); + + display_test_info(); + // initialize the QUDA library initQuda(device_ordinal); - test_rc = RUN_ALL_TESTS(); - endQuda(); - finalizeComms(); + // *** QUDA parameters begin here. + setVerbosity(QUDA_VERBOSE); + QudaGaugeParam param = newQudaGaugeParam(); - return test_rc; + double3 plaq; + cudaGaugeField *U; + int nsteps = heatbath_num_steps; + int nhbsteps = heatbath_num_heatbath_per_step; + int novrsteps = heatbath_num_overrelax_per_step; + bool coldstart = heatbath_coldstart; + double beta_value = heatbath_beta_value; + + RNG * randstates; + + // Setup gauge container. + param.cpu_prec = prec; + param.cpu_prec = prec; + param.cuda_prec = prec; + param.reconstruct = link_recon; + param.cuda_prec_sloppy = prec; + param.reconstruct_sloppy = link_recon; + + param.type = QUDA_WILSON_LINKS; + param.gauge_order = QUDA_MILC_GAUGE_ORDER; + + param.X[0] = xdim; + param.X[1] = ydim; + param.X[2] = zdim; + param.X[3] = tdim; + setDims(param.X); + + param.anisotropy = 1.0; //don't support anisotropy for now!!!!!! + param.t_boundary = QUDA_PERIODIC_T; + param.gauge_fix = QUDA_GAUGE_FIXED_NO; + param.ga_pad = 0; + + GaugeFieldParam gParam(0, param); + gParam.pad = 0; + gParam.ghostExchange = QUDA_GHOST_EXCHANGE_NO; + gParam.create = QUDA_NULL_FIELD_CREATE; + gParam.link_type = param.type; + gParam.reconstruct = param.reconstruct; + gParam.setPrecision(gParam.Precision(), true); + + int y[4]; + int R[4] = {0,0,0,0}; + for(int dir=0; dir<4; ++dir) if(comm_dim_partitioned(dir)) R[dir] = 2; + for(int dir=0; dir<4; ++dir) y[dir] = param.X[dir] + 2 * R[dir]; + int pad = 0; + GaugeFieldParam gParamEx(y, prec, link_recon, + pad, QUDA_VECTOR_GEOMETRY, QUDA_GHOST_EXCHANGE_EXTENDED); + gParamEx.create = QUDA_ZERO_FIELD_CREATE; + gParamEx.order = gParam.order; + gParamEx.siteSubset = QUDA_FULL_SITE_SUBSET; + gParamEx.t_boundary = gParam.t_boundary; + gParamEx.nFace = 1; + for(int dir=0; dir<4; ++dir) gParamEx.r[dir] = R[dir]; + U = new cudaGaugeField(gParamEx); + + // CURAND random generator initialization + randstates = new RNG(gParam, 1234); + randstates->Init(); + + int *num_failures_h = (int *)mapped_malloc(sizeof(int)); + int *num_failures_d = (int *)get_mapped_device_pointer(num_failures_h); + + if (link_recon != QUDA_RECONSTRUCT_8 && coldstart) + InitGaugeField(*U); + else + InitGaugeField(*U, *randstates); + + // Reunitarization setup + SetReunitarizationConsts(); + plaquette(*U); + + for(int step=1; step<=nsteps; ++step){ + printfQuda("Step %d\n",step); + Monte(*U, *randstates, beta_value, nhbsteps, novrsteps); + + //Reunitarize gauge links... + *num_failures_h = 0; + unitarizeLinks(*U, num_failures_d); + qudaDeviceSynchronize(); + if (*num_failures_h > 0) errorQuda("Error in the unitarization\n"); + + plaquette(*U); + } + + plaq = plaquette(*U); + printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z); + + host_free(num_failures_h); + + // Gauge Fixing Routines + //--------------------------------------------------------------------------- + switch (test_type) { + case 0: + printfQuda("%s gauge fixing with overrelaxation\n", gf_gauge_dir == 4 ? "Landau" : "Coulomb"); + gaugeFixingOVR(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, gf_theta_condition); + comparePlaquette(plaq, plaquette(*U)); + break; + + case 1: + if (!checkDimsPartitioned()) { + printfQuda("%s gauge fixing with steepest descent method with FFTs\n", gf_gauge_dir == 4 ? "Landau" : "Coulomb"); + gaugeFixingFFT(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, gf_theta_condition); + comparePlaquette(plaq, plaquette(*U)); + } else { + errorQuda("FFT gauge fixing not supported for multi GPU geometry"); + } + break; + + default: + errorQuda("Unknown test type %d", test_type); + } + + double2 link_trace = getLinkTrace(*U); + printfQuda("Tr: %.16e:%.16e\n", link_trace.x/3.0, link_trace.y/3.0); + + // Save if output string is specified + if (strcmp(gauge_outfile,"")) { + + printfQuda("Saving the gauge field to file %s\n", gauge_outfile); + + QudaGaugeParam gauge_param = newQudaGaugeParam(); + setWilsonGaugeParam(gauge_param); + + void *cpu_gauge[4]; + for (int dir = 0; dir < 4; dir++) { cpu_gauge[dir] = malloc(V * gauge_site_size * gauge_param.cpu_prec); } + + cudaGaugeField *gauge; + gauge = new cudaGaugeField(gParam); + + // copy into regular field + copyExtendedGauge(*gauge, *U, QUDA_CUDA_FIELD_LOCATION); + saveGaugeFieldQuda((void*)cpu_gauge, (void*)gauge, &gauge_param); + + // Write to disk + write_gauge_field(gauge_outfile, cpu_gauge, gauge_param.cpu_prec, gauge_param.X, 0, (char**)0); + + for (int dir = 0; dir<4; dir++) free(cpu_gauge[dir]); + delete gauge; + } else { + printfQuda("No output file specified.\n"); + } + + delete U; + + //Release all temporary memory used for data exchange between GPUs in multi-GPU mode + PGaugeExchangeFree(); + + randstates->Release(); + delete randstates; + + freeGaugeQuda(); + endQuda(); + finalizeComms(); + + return 0; } diff --git a/tests/su3_test.cpp b/tests/su3_test.cpp index 86827d3096..1c0b75c3a8 100644 --- a/tests/su3_test.cpp +++ b/tests/su3_test.cpp @@ -114,34 +114,34 @@ int main(int argc, char **argv) // initialize QMP/MPI, QUDA comms grid and RNG (host_utils.cpp) initComms(argc, argv, gridsize_from_cmdline); - QudaGaugeParam gauge_param = newQudaGaugeParam(); - if (prec_sloppy == QUDA_INVALID_PRECISION) - prec_sloppy = prec; - if (link_recon_sloppy == QUDA_RECONSTRUCT_INVALID) - link_recon_sloppy = link_recon; + if (prec_sloppy == QUDA_INVALID_PRECISION) prec_sloppy = prec; + if (link_recon_sloppy == QUDA_RECONSTRUCT_INVALID) link_recon_sloppy = link_recon; + + initQuda(device_ordinal); + setVerbosity(verbosity); + + // call srand() with a rank-dependent seed + initRand(); + QudaGaugeParam gauge_param = newQudaGaugeParam(); setGaugeParam(gauge_param); setDims(gauge_param.X); - + + // All user inputs now defined + display_test_info(); + + // *** QUDA parameters begin here. void *gauge[4], *new_gauge[4]; - for (int dir = 0; dir < 4; dir++) { gauge[dir] = malloc(V * gauge_site_size * host_gauge_data_type_size); new_gauge[dir] = malloc(V * gauge_site_size * host_gauge_data_type_size); } - - initQuda(device_ordinal); - - setVerbosity(verbosity); - - // call srand() with a rank-dependent seed - initRand(); - + constructHostGaugeField(gauge, gauge_param, argc, argv); // Load the gauge field to the device loadGaugeQuda((void *)gauge, &gauge_param); saveGaugeQuda(new_gauge, &gauge_param); - + double plaq[3]; plaqQuda(plaq); printfQuda("Computed plaquette gauge precise is %.16e (spatial = %.16e, temporal = %.16e)\n", plaq[0], plaq[1], @@ -149,9 +149,6 @@ int main(int argc, char **argv) #ifdef GPU_GAUGE_TOOLS - // All user inputs now defined - display_test_info(); - // Topological charge and gauge energy double q_charge_check = 0.0; // Size of floating point data From 9aa3aca72682e24716248722041cf3dc67ee77c6 Mon Sep 17 00:00:00 2001 From: cpviolator Date: Fri, 30 Apr 2021 18:54:13 -0700 Subject: [PATCH 04/32] clang tidy --- lib/interface_quda.cpp | 7 +- tests/gauge_alg_ctest.cpp | 80 +++++++++-------- tests/gauge_alg_test.cpp | 130 ++++++++++++++-------------- tests/heatbath_test.cpp | 2 +- tests/su3_test.cpp | 12 +-- tests/utils/command_line_params.cpp | 38 ++++---- 6 files changed, 135 insertions(+), 134 deletions(-) diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp index 6638a32046..f89d899496 100644 --- a/lib/interface_quda.cpp +++ b/lib/interface_quda.cpp @@ -5812,7 +5812,7 @@ int computeGaugeFixingOVRQuda(void *gauge, const unsigned int gauge_dir, const u double *timeinfo) { profileGaugeFixOVR.TPSTART(QUDA_PROFILE_TOTAL); - + checkGaugeParam(param); profileGaugeFixOVR.TPSTART(QUDA_PROFILE_INIT); @@ -5850,7 +5850,7 @@ int computeGaugeFixingOVRQuda(void *gauge, const unsigned int gauge_dir, const u copyExtendedGauge(*cudaInGauge, *cudaInGaugeEx, QUDA_CUDA_FIELD_LOCATION); } - + // Copy the gauge field back to the host profileGaugeFixOVR.TPSTART(QUDA_PROFILE_D2H); cudaInGauge->saveCPUField(*cpuGauge); @@ -5894,7 +5894,6 @@ int computeGaugeFixingFFTQuda(void* gauge, const unsigned int gauge_dir, const gParam.setPrecision(gParam.Precision(), true); auto *cudaInGauge = new cudaGaugeField(gParam); - profileGaugeFixFFT.TPSTOP(QUDA_PROFILE_INIT); profileGaugeFixFFT.TPSTART(QUDA_PROFILE_H2D); @@ -5923,7 +5922,7 @@ int computeGaugeFixingFFTQuda(void* gauge, const unsigned int gauge_dir, const } else { delete cudaInGauge; } - + if (timeinfo) { timeinfo[0] = profileGaugeFixFFT.Last(QUDA_PROFILE_H2D); timeinfo[1] = profileGaugeFixFFT.Last(QUDA_PROFILE_COMPUTE); diff --git a/tests/gauge_alg_ctest.cpp b/tests/gauge_alg_ctest.cpp index 410d2304bd..cdaa2efd9a 100644 --- a/tests/gauge_alg_ctest.cpp +++ b/tests/gauge_alg_ctest.cpp @@ -22,12 +22,11 @@ using namespace quda; class GaugeAlgTest : public ::testing::Test -{ - protected: - +{ +protected: QudaGaugeParam param; - - Timer a0,a1; + + Timer a0, a1; double2 detu; double3 plaq; cudaGaugeField *U; @@ -37,20 +36,17 @@ class GaugeAlgTest : public ::testing::Test bool coldstart; double beta_value; - RNG * randstates; + RNG *randstates; - - void SetReunitarizationConsts(){ + void SetReunitarizationConsts() + { const double unitarize_eps = 1e-14; const double max_error = 1e-10; const int reunit_allow_svd = 1; - const int reunit_svd_only = 0; + const int reunit_svd_only = 0; const double svd_rel_error = 1e-6; const double svd_abs_error = 1e-6; - setUnitarizeLinksConstants(unitarize_eps, max_error, - reunit_allow_svd, reunit_svd_only, - svd_rel_error, svd_abs_error); - + setUnitarizeLinksConstants(unitarize_eps, max_error, reunit_allow_svd, reunit_svd_only, svd_rel_error, svd_abs_error); } bool checkDimsPartitioned() @@ -60,8 +56,9 @@ class GaugeAlgTest : public ::testing::Test return false; } - bool comparePlaquette(double3 a, double3 b){ - double a0,a1,a2; + bool comparePlaquette(double3 a, double3 b) + { + double a0, a1, a2; a0 = std::abs(a.x - b.x); a1 = std::abs(a.y - b.y); a2 = std::abs(a.z - b.z); @@ -71,14 +68,16 @@ class GaugeAlgTest : public ::testing::Test return false; } - bool CheckDeterminant(double2 detu){ + bool CheckDeterminant(double2 detu) + { double prec_val = 5e-8; if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1.0e-15; if (std::abs(1.0 - detu.x) < prec_val && std::abs(detu.y) < prec_val) return true; return false; } - virtual void SetUp() { + virtual void SetUp() + { setVerbosity(QUDA_VERBOSE); param = newQudaGaugeParam(); @@ -100,7 +99,7 @@ class GaugeAlgTest : public ::testing::Test param.X[3] = tdim; setDims(param.X); - param.anisotropy = 1.0; //don't support anisotropy for now!!!!!! + param.anisotropy = 1.0; // don't support anisotropy for now!!!!!! param.t_boundary = QUDA_PERIODIC_T; param.gauge_fix = QUDA_GAUGE_FIXED_NO; param.ga_pad = 0; @@ -108,25 +107,25 @@ class GaugeAlgTest : public ::testing::Test GaugeFieldParam gParam(0, param); gParam.pad = 0; gParam.ghostExchange = QUDA_GHOST_EXCHANGE_NO; - gParam.create = QUDA_NULL_FIELD_CREATE; - gParam.link_type = param.type; + gParam.create = QUDA_NULL_FIELD_CREATE; + gParam.link_type = param.type; gParam.reconstruct = param.reconstruct; gParam.setPrecision(gParam.Precision(), true); #ifdef MULTI_GPU int y[4]; - int R[4] = {0,0,0,0}; - for(int dir=0; dir<4; ++dir) if(comm_dim_partitioned(dir)) R[dir] = 2; - for(int dir=0; dir<4; ++dir) y[dir] = param.X[dir] + 2 * R[dir]; + int R[4] = {0, 0, 0, 0}; + for (int dir = 0; dir < 4; ++dir) + if (comm_dim_partitioned(dir)) R[dir] = 2; + for (int dir = 0; dir < 4; ++dir) y[dir] = param.X[dir] + 2 * R[dir]; int pad = 0; - GaugeFieldParam gParamEx(y, prec, link_recon, - pad, QUDA_VECTOR_GEOMETRY, QUDA_GHOST_EXCHANGE_EXTENDED); + GaugeFieldParam gParamEx(y, prec, link_recon, pad, QUDA_VECTOR_GEOMETRY, QUDA_GHOST_EXCHANGE_EXTENDED); gParamEx.create = QUDA_ZERO_FIELD_CREATE; gParamEx.order = gParam.order; gParamEx.siteSubset = QUDA_FULL_SITE_SUBSET; gParamEx.t_boundary = gParam.t_boundary; gParamEx.nFace = 1; - for(int dir=0; dir<4; ++dir) gParamEx.r[dir] = R[dir]; + for (int dir = 0; dir < 4; ++dir) gParamEx.r[dir] = R[dir]; U = new cudaGaugeField(gParamEx); #else U = new cudaGaugeField(gParam); @@ -156,11 +155,11 @@ class GaugeAlgTest : public ::testing::Test SetReunitarizationConsts(); plaquette(*U); - for(int step=1; step<=nsteps; ++step){ - printfQuda("Step %d\n",step); + for (int step = 1; step <= nsteps; ++step) { + printfQuda("Step %d\n", step); Monte(*U, *randstates, beta_value, nhbsteps, novrsteps); - //Reunitarize gauge links... + // Reunitarize gauge links... *num_failures_h = 0; unitarizeLinks(*U, num_failures_d); qudaDeviceSynchronize(); @@ -177,14 +176,15 @@ class GaugeAlgTest : public ::testing::Test host_free(num_failures_h); } - virtual void TearDown() { + virtual void TearDown() + { detu = getLinkDeterminant(*U); double2 tru = getLinkTrace(*U); printfQuda("Det: %.16e:%.16e\n", detu.x, detu.y); - printfQuda("Tr: %.16e:%.16e\n", tru.x/3.0, tru.y/3.0); + printfQuda("Tr: %.16e:%.16e\n", tru.x / 3.0, tru.y / 3.0); delete U; - //Release all temporary memory used for data exchange between GPUs in multi-GPU mode + // Release all temporary memory used for data exchange between GPUs in multi-GPU mode PGaugeExchangeFree(); a0.Stop(__func__, __FILE__, __LINE__); @@ -203,7 +203,8 @@ TEST_F(GaugeAlgTest, Generation) TEST_F(GaugeAlgTest, Landau_Overrelaxation) { printfQuda("Landau gauge fixing with overrelaxation\n"); - gaugeFixingOVR(*U, 4, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, gf_theta_condition); + gaugeFixingOVR(*U, 4, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, + gf_theta_condition); auto plaq_gf = plaquette(*U); printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); @@ -212,7 +213,8 @@ TEST_F(GaugeAlgTest, Landau_Overrelaxation) TEST_F(GaugeAlgTest, Coulomb_Overrelaxation) { printfQuda("Coulomb gauge fixing with overrelaxation\n"); - gaugeFixingOVR(*U, 3, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, gf_theta_condition); + gaugeFixingOVR(*U, 3, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, + gf_theta_condition); auto plaq_gf = plaquette(*U); printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); @@ -222,7 +224,8 @@ TEST_F(GaugeAlgTest, Landau_FFT) { if (!checkDimsPartitioned()) { printfQuda("Landau gauge fixing with steepest descent method with FFTs\n"); - gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, gf_theta_condition); + gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, + gf_theta_condition); auto plaq_gf = plaquette(*U); printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); @@ -233,7 +236,8 @@ TEST_F(GaugeAlgTest, Coulomb_FFT) { if (!checkDimsPartitioned()) { printfQuda("Coulomb gauge fixing with steepest descent method with FFTs\n"); - gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, gf_theta_condition); + gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, + gf_theta_condition); auto plaq_gf = plaquette(*U); printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); @@ -246,9 +250,9 @@ int main(int argc, char **argv) ::testing::InitGoogleTest(&argc, argv); // return code for google test int test_rc = 0; - xdim=ydim=zdim=tdim=32; + xdim = ydim = zdim = tdim = 32; - // command line options + // command line options auto app = make_app(); add_gaugefix_option_group(app); add_heatbath_option_group(app); diff --git a/tests/gauge_alg_test.cpp b/tests/gauge_alg_test.cpp index 5579460ab7..ad7985c464 100644 --- a/tests/gauge_alg_test.cpp +++ b/tests/gauge_alg_test.cpp @@ -25,7 +25,7 @@ using namespace quda; void display_test_info() { printfQuda("running the following test:\n"); - + printfQuda("prec sloppy_prec link_recon sloppy_link_recon S_dimension T_dimension Ls_dimension\n"); printfQuda("%s %s %s %s %d/%d/%d %d %d\n", get_prec_str(prec), get_prec_str(prec_sloppy), get_recon_str(link_recon), get_recon_str(link_recon_sloppy), xdim, ydim, zdim, @@ -36,18 +36,15 @@ void display_test_info() dimPartitioned(3)); } - -void SetReunitarizationConsts(){ +void SetReunitarizationConsts() +{ const double unitarize_eps = 1e-14; const double max_error = 1e-10; const int reunit_allow_svd = 1; - const int reunit_svd_only = 0; + const int reunit_svd_only = 0; const double svd_rel_error = 1e-6; const double svd_abs_error = 1e-6; - setUnitarizeLinksConstants(unitarize_eps, max_error, - reunit_allow_svd, reunit_svd_only, - svd_rel_error, svd_abs_error); - + setUnitarizeLinksConstants(unitarize_eps, max_error, reunit_allow_svd, reunit_svd_only, svd_rel_error, svd_abs_error); } bool checkDimsPartitioned() @@ -60,8 +57,8 @@ bool checkDimsPartitioned() bool comparePlaquette(double3 a, double3 b) { printfQuda("Plaq: %.16e, %.16e, %.16e\n", a.x, a.y, a.z); - printfQuda("Plaq_gf: %.16e, %.16e, %.16e\n", b.x, b.y, b.z); - double a0,a1,a2; + printfQuda("Plaq_gf: %.16e, %.16e, %.16e\n", b.x, b.y, b.z); + double a0, a1, a2; a0 = std::abs(a.x - b.x); a1 = std::abs(a.y - b.y); a2 = std::abs(a.z - b.z); @@ -80,7 +77,7 @@ bool checkDeterminant(double2 detu) int main(int argc, char **argv) { - // command line options + // command line options auto app = make_app(); add_gaugefix_option_group(app); add_heatbath_option_group(app); @@ -117,9 +114,9 @@ int main(int argc, char **argv) int novrsteps = heatbath_num_overrelax_per_step; bool coldstart = heatbath_coldstart; double beta_value = heatbath_beta_value; - - RNG * randstates; - + + RNG *randstates; + // Setup gauge container. param.cpu_prec = prec; param.cpu_prec = prec; @@ -127,142 +124,143 @@ int main(int argc, char **argv) param.reconstruct = link_recon; param.cuda_prec_sloppy = prec; param.reconstruct_sloppy = link_recon; - + param.type = QUDA_WILSON_LINKS; param.gauge_order = QUDA_MILC_GAUGE_ORDER; - + param.X[0] = xdim; param.X[1] = ydim; param.X[2] = zdim; param.X[3] = tdim; setDims(param.X); - - param.anisotropy = 1.0; //don't support anisotropy for now!!!!!! + + param.anisotropy = 1.0; // don't support anisotropy for now!!!!!! param.t_boundary = QUDA_PERIODIC_T; param.gauge_fix = QUDA_GAUGE_FIXED_NO; param.ga_pad = 0; - + GaugeFieldParam gParam(0, param); gParam.pad = 0; gParam.ghostExchange = QUDA_GHOST_EXCHANGE_NO; - gParam.create = QUDA_NULL_FIELD_CREATE; - gParam.link_type = param.type; + gParam.create = QUDA_NULL_FIELD_CREATE; + gParam.link_type = param.type; gParam.reconstruct = param.reconstruct; gParam.setPrecision(gParam.Precision(), true); - + int y[4]; - int R[4] = {0,0,0,0}; - for(int dir=0; dir<4; ++dir) if(comm_dim_partitioned(dir)) R[dir] = 2; - for(int dir=0; dir<4; ++dir) y[dir] = param.X[dir] + 2 * R[dir]; + int R[4] = {0, 0, 0, 0}; + for (int dir = 0; dir < 4; ++dir) + if (comm_dim_partitioned(dir)) R[dir] = 2; + for (int dir = 0; dir < 4; ++dir) y[dir] = param.X[dir] + 2 * R[dir]; int pad = 0; - GaugeFieldParam gParamEx(y, prec, link_recon, - pad, QUDA_VECTOR_GEOMETRY, QUDA_GHOST_EXCHANGE_EXTENDED); + GaugeFieldParam gParamEx(y, prec, link_recon, pad, QUDA_VECTOR_GEOMETRY, QUDA_GHOST_EXCHANGE_EXTENDED); gParamEx.create = QUDA_ZERO_FIELD_CREATE; gParamEx.order = gParam.order; gParamEx.siteSubset = QUDA_FULL_SITE_SUBSET; gParamEx.t_boundary = gParam.t_boundary; gParamEx.nFace = 1; - for(int dir=0; dir<4; ++dir) gParamEx.r[dir] = R[dir]; + for (int dir = 0; dir < 4; ++dir) gParamEx.r[dir] = R[dir]; U = new cudaGaugeField(gParamEx); // CURAND random generator initialization randstates = new RNG(gParam, 1234); randstates->Init(); - + int *num_failures_h = (int *)mapped_malloc(sizeof(int)); int *num_failures_d = (int *)get_mapped_device_pointer(num_failures_h); - + if (link_recon != QUDA_RECONSTRUCT_8 && coldstart) InitGaugeField(*U); else InitGaugeField(*U, *randstates); - + // Reunitarization setup SetReunitarizationConsts(); plaquette(*U); - - for(int step=1; step<=nsteps; ++step){ - printfQuda("Step %d\n",step); + + for (int step = 1; step <= nsteps; ++step) { + printfQuda("Step %d\n", step); Monte(*U, *randstates, beta_value, nhbsteps, novrsteps); - - //Reunitarize gauge links... + + // Reunitarize gauge links... *num_failures_h = 0; unitarizeLinks(*U, num_failures_d); qudaDeviceSynchronize(); if (*num_failures_h > 0) errorQuda("Error in the unitarization\n"); - + plaquette(*U); } - + plaq = plaquette(*U); printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z); - + host_free(num_failures_h); // Gauge Fixing Routines //--------------------------------------------------------------------------- switch (test_type) { - case 0: + case 0: printfQuda("%s gauge fixing with overrelaxation\n", gf_gauge_dir == 4 ? "Landau" : "Coulomb"); - gaugeFixingOVR(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, gf_theta_condition); + gaugeFixingOVR(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, + gf_reunit_interval, gf_theta_condition); comparePlaquette(plaq, plaquette(*U)); break; - + case 1: if (!checkDimsPartitioned()) { printfQuda("%s gauge fixing with steepest descent method with FFTs\n", gf_gauge_dir == 4 ? "Landau" : "Coulomb"); - gaugeFixingFFT(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, gf_theta_condition); + gaugeFixingFFT(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, + gf_theta_condition); comparePlaquette(plaq, plaquette(*U)); } else { errorQuda("FFT gauge fixing not supported for multi GPU geometry"); } break; - - default: - errorQuda("Unknown test type %d", test_type); + + default: errorQuda("Unknown test type %d", test_type); } double2 link_trace = getLinkTrace(*U); - printfQuda("Tr: %.16e:%.16e\n", link_trace.x/3.0, link_trace.y/3.0); + printfQuda("Tr: %.16e:%.16e\n", link_trace.x / 3.0, link_trace.y / 3.0); // Save if output string is specified - if (strcmp(gauge_outfile,"")) { - + if (strcmp(gauge_outfile, "")) { + printfQuda("Saving the gauge field to file %s\n", gauge_outfile); QudaGaugeParam gauge_param = newQudaGaugeParam(); setWilsonGaugeParam(gauge_param); - + void *cpu_gauge[4]; for (int dir = 0; dir < 4; dir++) { cpu_gauge[dir] = malloc(V * gauge_site_size * gauge_param.cpu_prec); } - + cudaGaugeField *gauge; gauge = new cudaGaugeField(gParam); - + // copy into regular field - copyExtendedGauge(*gauge, *U, QUDA_CUDA_FIELD_LOCATION); - saveGaugeFieldQuda((void*)cpu_gauge, (void*)gauge, &gauge_param); - + copyExtendedGauge(*gauge, *U, QUDA_CUDA_FIELD_LOCATION); + saveGaugeFieldQuda((void *)cpu_gauge, (void *)gauge, &gauge_param); + // Write to disk - write_gauge_field(gauge_outfile, cpu_gauge, gauge_param.cpu_prec, gauge_param.X, 0, (char**)0); - - for (int dir = 0; dir<4; dir++) free(cpu_gauge[dir]); + write_gauge_field(gauge_outfile, cpu_gauge, gauge_param.cpu_prec, gauge_param.X, 0, (char **)0); + + for (int dir = 0; dir < 4; dir++) free(cpu_gauge[dir]); delete gauge; } else { printfQuda("No output file specified.\n"); - } - + } + delete U; - - //Release all temporary memory used for data exchange between GPUs in multi-GPU mode + + // Release all temporary memory used for data exchange between GPUs in multi-GPU mode PGaugeExchangeFree(); - + randstates->Release(); delete randstates; - - freeGaugeQuda(); + + freeGaugeQuda(); endQuda(); finalizeComms(); - + return 0; } diff --git a/tests/heatbath_test.cpp b/tests/heatbath_test.cpp index a0734f1516..b11d569ad6 100644 --- a/tests/heatbath_test.cpp +++ b/tests/heatbath_test.cpp @@ -53,7 +53,7 @@ void display_test_info() } int main(int argc, char **argv) -{ +{ // command line options auto app = make_app(); add_heatbath_option_group(app); diff --git a/tests/su3_test.cpp b/tests/su3_test.cpp index 1c0b75c3a8..6cec875339 100644 --- a/tests/su3_test.cpp +++ b/tests/su3_test.cpp @@ -116,7 +116,7 @@ int main(int argc, char **argv) if (prec_sloppy == QUDA_INVALID_PRECISION) prec_sloppy = prec; if (link_recon_sloppy == QUDA_RECONSTRUCT_INVALID) link_recon_sloppy = link_recon; - + initQuda(device_ordinal); setVerbosity(verbosity); @@ -126,22 +126,22 @@ int main(int argc, char **argv) QudaGaugeParam gauge_param = newQudaGaugeParam(); setGaugeParam(gauge_param); setDims(gauge_param.X); - + // All user inputs now defined display_test_info(); - - // *** QUDA parameters begin here. + + // *** QUDA parameters begin here. void *gauge[4], *new_gauge[4]; for (int dir = 0; dir < 4; dir++) { gauge[dir] = malloc(V * gauge_site_size * host_gauge_data_type_size); new_gauge[dir] = malloc(V * gauge_site_size * host_gauge_data_type_size); } - + constructHostGaugeField(gauge, gauge_param, argc, argv); // Load the gauge field to the device loadGaugeQuda((void *)gauge, &gauge_param); saveGaugeQuda(new_gauge, &gauge_param); - + double plaq[3]; plaqQuda(plaq); printfQuda("Computed plaquette gauge precise is %.16e (spatial = %.16e, temporal = %.16e)\n", plaq[0], plaq[1], diff --git a/tests/utils/command_line_params.cpp b/tests/utils/command_line_params.cpp index b0e312b69d..efe22d8fe0 100644 --- a/tests/utils/command_line_params.cpp +++ b/tests/utils/command_line_params.cpp @@ -1020,41 +1020,41 @@ void add_heatbath_option_group(std::shared_ptr quda_app) auto opgroup = quda_app->add_option_group("heatbath", "Options controlling heatbath tests"); opgroup->add_option("--heatbath-beta", heatbath_beta_value, "Beta value used in heatbath test (default 6.2)"); opgroup->add_option("--heatbath-coldstart", heatbath_coldstart, - "Whether to use a cold or hot start in heatbath test (default false)"); + "Whether to use a cold or hot start in heatbath test (default false)"); opgroup->add_option("--heatbath-num-hb-per-step", heatbath_num_heatbath_per_step, - "Number of heatbath hits per heatbath step (default 5)"); + "Number of heatbath hits per heatbath step (default 5)"); opgroup->add_option("--heatbath-num-or-per-step", heatbath_num_overrelax_per_step, - "Number of overrelaxation hits per heatbath step (default 5)"); + "Number of overrelaxation hits per heatbath step (default 5)"); opgroup->add_option("--heatbath-num-steps", heatbath_num_steps, - "Number of measurement steps in heatbath test (default 10)"); + "Number of measurement steps in heatbath test (default 10)"); opgroup->add_option("--heatbath-warmup-steps", heatbath_warmup_steps, - "Number of warmup steps in heatbath test (default 10)"); + "Number of warmup steps in heatbath test (default 10)"); } void add_gaugefix_option_group(std::shared_ptr quda_app) { // Option group for gauge fixing related options auto opgroup = quda_app->add_option_group("gaugefix", "Options controlling gauge fixing tests"); - opgroup->add_option("--gf-dir", gf_gauge_dir, "The orthogonal direction of teh gauge fixing, 3=Coulomb, 4=Landau. (default 4)"); + opgroup->add_option("--gf-dir", gf_gauge_dir, + "The orthogonal direction of teh gauge fixing, 3=Coulomb, 4=Landau. (default 4)"); opgroup->add_option("--gf-maxiter", gf_maxiter, - "The maximun number of gauge fixing iterations to be applied (default 10000) "); + "The maximun number of gauge fixing iterations to be applied (default 10000) "); opgroup->add_option("--gf-verbosity-interval", gf_verbosity_interval, - "Print the gauge fixing progress every N steps (default 100)"); + "Print the gauge fixing progress every N steps (default 100)"); opgroup->add_option("--gf-ovr-relaxation-boost", gf_ovr_relaxation_boost, - "The overrelaxation boost parameter for the overrelaxation method (default 1.5)"); - opgroup->add_option("--gf-fft-alpha", gf_fft_alpha, - "The Alpha parameter in the FFT method (default 0.8)"); + "The overrelaxation boost parameter for the overrelaxation method (default 1.5)"); + opgroup->add_option("--gf-fft-alpha", gf_fft_alpha, "The Alpha parameter in the FFT method (default 0.8)"); opgroup->add_option("--gf-reunit-interval", gf_reunit_interval, - "Reunitarise the gauge field every N steps (default 10)"); - opgroup->add_option("--gf-tol", gf_tolerance, - "The tolerance of the gauge fixing quality (default 1e-6)"); - opgroup->add_option("--gf-theta-condition", gf_theta_condition, - "Use the theta value to determine the gauge fixing if true. If false, use the delta value (default false)"); - opgroup->add_option("--gf-fft-autotune", gf_fft_autotune, - "In the FFT method, automatically adjust the alpha parameter if the quality begins to diverge (default false)"); + "Reunitarise the gauge field every N steps (default 10)"); + opgroup->add_option("--gf-tol", gf_tolerance, "The tolerance of the gauge fixing quality (default 1e-6)"); + opgroup->add_option( + "--gf-theta-condition", gf_theta_condition, + "Use the theta value to determine the gauge fixing if true. If false, use the delta value (default false)"); + opgroup->add_option( + "--gf-fft-autotune", gf_fft_autotune, + "In the FFT method, automatically adjust the alpha parameter if the quality begins to diverge (default false)"); } - void add_comms_option_group(std::shared_ptr quda_app) { auto opgroup From b34ef65737439678d92fb58d2e6f6b6aa3363bfc Mon Sep 17 00:00:00 2001 From: cpviolator Date: Sat, 1 May 2021 17:59:56 -0700 Subject: [PATCH 05/32] Allow for single case testing in gauge_alg_ctest, minor clean up of gauge fixing stdout and comments --- include/gauge_tools.h | 4 +- include/quda.h | 4 +- include/quda_milc_interface.h | 4 +- lib/gauge_fix_fft.cu | 17 +- lib/gauge_fix_ovr.cu | 9 +- tests/CMakeLists.txt | 8 +- tests/gauge_alg_ctest.cpp | 443 ++++++++++++++++++++++------------ tests/gauge_alg_test.cpp | 266 -------------------- 8 files changed, 313 insertions(+), 442 deletions(-) delete mode 100644 tests/gauge_alg_test.cpp diff --git a/include/gauge_tools.h b/include/gauge_tools.h index 9c1654f483..7b9e39b5b9 100644 --- a/include/gauge_tools.h +++ b/include/gauge_tools.h @@ -120,7 +120,7 @@ namespace quda * value is zero then the method stops when iteration reachs the * maximum number of steps defined by Nsteps * @param[in] reunit_interval, reunitarize gauge field when iteration count is a multiple of this - * @param[in] stopWtheta, 0 for MILC criterium and 1 to use the theta value + * @param[in] stopWtheta, 0 for MILC criterion and 1 to use the theta value */ void gaugeFixingOVR(GaugeField &data, const int gauge_dir, const int Nsteps, const int verbose_interval, const double relax_boost, const double tolerance, const int reunit_interval, const int stopWtheta); @@ -136,7 +136,7 @@ namespace quda * @param[in] tolerance, torelance value to stop the method, if this * value is zero then the method stops when iteration reachs the * maximum number of steps defined by Nsteps - * @param[in] stopWtheta, 0 for MILC criterium and 1 to use the theta value + * @param[in] stopWtheta, 0 for MILC criterion and 1 to use the theta value */ void gaugeFixingFFT(GaugeField &data, const int gauge_dir, const int Nsteps, const int verbose_interval, const double alpha, const int autotune, const double tolerance, const int stopWtheta); diff --git a/include/quda.h b/include/quda.h index bbf55f037c..3151ca667e 100644 --- a/include/quda.h +++ b/include/quda.h @@ -1502,7 +1502,7 @@ extern "C" { * @param[in] relax_boost, gauge fixing parameter of the overrelaxation method, most common value is 1.5 or 1.7. * @param[in] tolerance, torelance value to stop the method, if this value is zero then the method stops when iteration reachs the maximum number of steps defined by Nsteps * @param[in] reunit_interval, reunitarize gauge field when iteration count is a multiple of this - * @param[in] stopWtheta, 0 for MILC criterium and 1 to use the theta value + * @param[in] stopWtheta, 0 for MILC criterion and 1 to use the theta value * @param[in] param The parameters of the external fields and the computation settings * @param[out] timeinfo */ @@ -1520,7 +1520,7 @@ extern "C" { * @param[in] autotune, 1 to autotune the method, i.e., if the Fg inverts its tendency we decrease the alpha value * @param[in] tolerance, torelance value to stop the method, if this value is zero then the method stops when * iteration reachs the maximum number of steps defined by Nsteps - * @param[in] stopWtheta, 0 for MILC criterium and 1 to use the theta value + * @param[in] stopWtheta, 0 for MILC criterion and 1 to use the theta value * @param[in] param The parameters of the external fields and the computation settings * @param[out] timeinfo */ diff --git a/include/quda_milc_interface.h b/include/quda_milc_interface.h index fc9e54b151..cd5a95794c 100644 --- a/include/quda_milc_interface.h +++ b/include/quda_milc_interface.h @@ -963,7 +963,7 @@ extern "C" { * @param[in] relax_boost, gauge fixing parameter of the overrelaxation method, most common value is 1.5 or 1.7. * @param[in] tolerance, torelance value to stop the method, if this value is zero then the method stops when iteration reachs the maximum number of steps defined by Nsteps * @param[in] reunit_interval, reunitarize gauge field when iteration count is a multiple of this - * @param[in] stopWtheta, 0 for MILC criterium and 1 to use the theta value + * @param[in] stopWtheta, 0 for MILC criterion and 1 to use the theta value * @param[in,out] milc_sitelink, MILC gauge field to be fixed */ void qudaGaugeFixingOVR( const int precision, @@ -987,7 +987,7 @@ extern "C" { * @param[in] alpha, gauge fixing parameter of the method, most common value is 0.08 * @param[in] autotune, 1 to autotune the method, i.e., if the Fg inverts its tendency we decrease the alpha value * @param[in] tolerance, torelance value to stop the method, if this value is zero then the method stops when iteration reachs the maximum number of steps defined by Nsteps - * @param[in] stopWtheta, 0 for MILC criterium and 1 to use the theta value + * @param[in] stopWtheta, 0 for MILC criterion and 1 to use the theta value * @param[in,out] milc_sitelink, MILC gauge field to be fixed */ void qudaGaugeFixingFFT( int precision, diff --git a/lib/gauge_fix_fft.cu b/lib/gauge_fix_fft.cu index 2466000c46..a2d4f9c633 100644 --- a/lib/gauge_fix_fft.cu +++ b/lib/gauge_fix_fft.cu @@ -635,16 +635,13 @@ namespace quda { profileInternalGaugeFixFFT.TPSTART(QUDA_PROFILE_COMPUTE); Float alpha = alpha0; - std::cout << "\tAlpha parameter of the Steepest Descent Method: " << alpha << std::endl; - if ( autotune ) std::cout << "\tAuto tune active: yes" << std::endl; - else std::cout << "\tAuto tune active: no" << std::endl; - std::cout << "\tStop criterium: " << tolerance << std::endl; - if ( stopWtheta ) std::cout << "\tStop criterium method: theta" << std::endl; - else std::cout << "\tStop criterium method: Delta" << std::endl; - std::cout << "\tMaximum number of iterations: " << Nsteps << std::endl; - std::cout << "\tPrint convergence results at every " << verbose_interval << " steps" << std::endl; - - + printfQuda("\tAlpha parameter of the Steepest Descent Method: %e\n", alpha); + printfQuda("\tAuto tune active: %s\n", autotune ? "yes" : "no"); + printfQuda("\tStop criterion: %e\n", tolerance); + printfQuda("\tStop criterion method: %s\n", stopWtheta ? "theta" : "delta"); + printfQuda("\tMaximum number of iterations: %d\n", Nsteps); + printfQuda("\tPrint convergence results at every %d steps\n", verbose_interval); + unsigned int delta_pad = data.X()[0] * data.X()[1] * data.X()[2] * data.X()[3]; int4 size = make_int4( data.X()[0], data.X()[1], data.X()[2], data.X()[3] ); cufftHandle plan_xy; diff --git a/lib/gauge_fix_ovr.cu b/lib/gauge_fix_ovr.cu index 93793bbf61..1f3d6485f3 100644 --- a/lib/gauge_fix_ovr.cu +++ b/lib/gauge_fix_ovr.cu @@ -1071,13 +1071,12 @@ public: double byte = 0; printfQuda("\tOverrelaxation boost parameter: %lf\n", (double)relax_boost); - printfQuda("\tStop criterium: %lf\n", tolerance); - if ( stopWtheta ) printfQuda("\tStop criterium method: theta\n"); - else printfQuda("\tStop criterium method: Delta\n"); + printfQuda("\tStop criterion: %lf\n", tolerance); + printfQuda("\tStop criterion method: %s\n", stopWtheta ? "theta" : "delta"); printfQuda("\tMaximum number of iterations: %d\n", Nsteps); printfQuda("\tReunitarize at every %d steps\n", reunit_interval); printfQuda("\tPrint convergence results at every %d steps\n", verbose_interval); - + const double unitarize_eps = 1e-14; const double max_error = 1e-10; const int reunit_allow_svd = 1; @@ -1398,7 +1397,7 @@ public: * @param[in] relax_boost, gauge fixing parameter of the overrelaxation method, most common value is 1.5 or 1.7. * @param[in] tolerance, torelance value to stop the method, if this value is zero then the method stops when iteration reachs the maximum number of steps defined by Nsteps * @param[in] reunit_interval, reunitarize gauge field when iteration count is a multiple of this - * @param[in] stopWtheta, 0 for MILC criterium and 1 to use the theta value + * @param[in] stopWtheta, 0 for MILC criterion and 1 to use the theta value */ void gaugeFixingOVR(GaugeField& data, const int gauge_dir, const int Nsteps, const int verbose_interval, const double relax_boost, const double tolerance, const int reunit_interval, const int stopWtheta) { diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index ed9e9df62f..560785ae2d 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -236,14 +236,10 @@ if(QUDA_FORCE_GAUGE) endif() if(QUDA_GAUGE_ALG) - add_executable(gauge_alg_test gauge_alg_test.cpp) - target_link_libraries(gauge_alg_test ${TEST_LIBS}) - quda_checkbuildtest(gauge_alg_test QUDA_BUILD_ALL_TESTS) - add_executable(gauge_alg_ctest gauge_alg_ctest.cpp) target_link_libraries(gauge_alg_ctest ${TEST_LIBS}) quda_checkbuildtest(gauge_alg_ctest QUDA_BUILD_ALL_TESTS) - install(TARGETS gauge_alg_test ${QUDA_EXCLUDE_FROM_INSTALL} DESTINATION ${CMAKE_INSTALL_BINDIR}) + install(TARGETS gauge_alg_ctest ${QUDA_EXCLUDE_FROM_INSTALL} DESTINATION ${CMAKE_INSTALL_BINDIR}) add_executable(heatbath_test heatbath_test.cpp) target_link_libraries(heatbath_test ${TEST_LIBS}) @@ -809,7 +805,7 @@ foreach(prec IN LISTS TEST_PRECS) if(QUDA_GAUGE_ALG) add_test(NAME gauge_alg_${prec} - COMMAND ${QUDA_CTEST_LAUNCH} $ ${MPIEXEC_POSTFLAGS} + COMMAND ${QUDA_CTEST_LAUNCH} $ ${MPIEXEC_POSTFLAGS} --dim 2 4 6 8 --prec ${prec} --gtest_output=xml:gauge_arg_test_${prec}.xml) endif() diff --git a/tests/gauge_alg_ctest.cpp b/tests/gauge_alg_ctest.cpp index cdaa2efd9a..881d53bb76 100644 --- a/tests/gauge_alg_ctest.cpp +++ b/tests/gauge_alg_ctest.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -21,6 +22,42 @@ using namespace quda; +//***********************************************************// +// This boolean controls whether or not the full Google test // +// is done. If the user passes a value of 1 or 2 to --test // +// then a single instance of OVR or FFT gauge fixing is done // +// and the value of this bool is set to false. Otherwise the // +// Google tests are performed. // +//***********************************************************// +bool execute = true; + +void display_test_info() +{ + printfQuda("running the following test:\n"); + + switch (test_type) { + case 0: + printfQuda("\n Google testing\n"); + break; + case 1: + printfQuda("\nOVR gauge fix\n"); + break; + case 2: + printfQuda("\nFFT gauge fix\n"); + break; + default: errorQuda("Undefined test type %d given", test_type); + } + + printfQuda("prec sloppy_prec link_recon sloppy_link_recon S_dimension T_dimension Ls_dimension\n"); + printfQuda("%s %s %s %s %d/%d/%d %d %d\n", get_prec_str(prec), + get_prec_str(prec_sloppy), get_recon_str(link_recon), get_recon_str(link_recon_sloppy), xdim, ydim, zdim, + tdim, Lsdim); + + printfQuda("Grid partition info: X Y Z T\n"); + printfQuda(" %d %d %d %d\n", dimPartitioned(0), dimPartitioned(1), dimPartitioned(2), + dimPartitioned(3)); +} + class GaugeAlgTest : public ::testing::Test { protected: @@ -35,7 +72,9 @@ class GaugeAlgTest : public ::testing::Test int novrsteps; bool coldstart; double beta_value; - + + bool unit_test; + RNG *randstates; void SetReunitarizationConsts() @@ -64,45 +103,195 @@ class GaugeAlgTest : public ::testing::Test a2 = std::abs(a.z - b.z); double prec_val = 1.0e-5; if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1.0e-15; - if ((a0 < prec_val) && (a1 < prec_val) && (a2 < prec_val)) return true; - return false; + return ((a0 < prec_val) && (a1 < prec_val) && (a2 < prec_val)); } bool CheckDeterminant(double2 detu) { double prec_val = 5e-8; if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1.0e-15; - if (std::abs(1.0 - detu.x) < prec_val && std::abs(detu.y) < prec_val) return true; - return false; + return (std::abs(1.0 - detu.x) < prec_val && std::abs(detu.y) < prec_val); } virtual void SetUp() { - setVerbosity(QUDA_VERBOSE); - - param = newQudaGaugeParam(); - - // Setup gauge container. - param.cpu_prec = prec; - param.cpu_prec = prec; - param.cuda_prec = prec; - param.reconstruct = link_recon; - param.cuda_prec_sloppy = prec; - param.reconstruct_sloppy = link_recon; - - param.type = QUDA_WILSON_LINKS; - param.gauge_order = QUDA_MILC_GAUGE_ORDER; - - param.X[0] = xdim; - param.X[1] = ydim; - param.X[2] = zdim; - param.X[3] = tdim; - setDims(param.X); + if(execute) { + setVerbosity(QUDA_VERBOSE); + param = newQudaGaugeParam(); + + // Setup gauge container. + param.cpu_prec = prec; + param.cpu_prec = prec; + param.cuda_prec = prec; + param.reconstruct = link_recon; + param.cuda_prec_sloppy = prec; + param.reconstruct_sloppy = link_recon; + + param.type = QUDA_WILSON_LINKS; + param.gauge_order = QUDA_MILC_GAUGE_ORDER; + + param.X[0] = xdim; + param.X[1] = ydim; + param.X[2] = zdim; + param.X[3] = tdim; + setDims(param.X); + + param.anisotropy = 1.0; // don't support anisotropy for now!!!!!! + param.t_boundary = QUDA_PERIODIC_T; + param.gauge_fix = QUDA_GAUGE_FIXED_NO; + param.ga_pad = 0; + + GaugeFieldParam gParam(0, param); + gParam.pad = 0; + gParam.ghostExchange = QUDA_GHOST_EXCHANGE_NO; + gParam.create = QUDA_NULL_FIELD_CREATE; + gParam.link_type = param.type; + gParam.reconstruct = param.reconstruct; + gParam.setPrecision(gParam.Precision(), true); + +#ifdef MULTI_GPU + int y[4]; + int R[4] = {0, 0, 0, 0}; + for (int dir = 0; dir < 4; ++dir) + if (comm_dim_partitioned(dir)) R[dir] = 2; + for (int dir = 0; dir < 4; ++dir) y[dir] = param.X[dir] + 2 * R[dir]; + int pad = 0; + GaugeFieldParam gParamEx(y, prec, link_recon, pad, QUDA_VECTOR_GEOMETRY, QUDA_GHOST_EXCHANGE_EXTENDED); + gParamEx.create = QUDA_ZERO_FIELD_CREATE; + gParamEx.order = gParam.order; + gParamEx.siteSubset = QUDA_FULL_SITE_SUBSET; + gParamEx.t_boundary = gParam.t_boundary; + gParamEx.nFace = 1; + for (int dir = 0; dir < 4; ++dir) gParamEx.r[dir] = R[dir]; + U = new cudaGaugeField(gParamEx); +#else + U = new cudaGaugeField(gParam); +#endif + // CURAND random generator initialization + randstates = new RNG(gParam, 1234); + randstates->Init(); + + nsteps = heatbath_num_steps; + nhbsteps = heatbath_num_heatbath_per_step; + novrsteps = heatbath_num_overrelax_per_step; + coldstart = heatbath_coldstart; + beta_value = heatbath_beta_value; + + a0.Start(__func__, __FILE__, __LINE__); + a1.Start(__func__, __FILE__, __LINE__); + + int *num_failures_h = (int *)mapped_malloc(sizeof(int)); + int *num_failures_d = (int *)get_mapped_device_pointer(num_failures_h); + + if (link_recon != QUDA_RECONSTRUCT_8 && coldstart) + InitGaugeField(*U); + else + InitGaugeField(*U, *randstates); + + // Reunitarization setup + SetReunitarizationConsts(); + plaquette(*U); + + for (int step = 1; step <= nsteps; ++step) { + printfQuda("Step %d\n", step); + Monte(*U, *randstates, beta_value, nhbsteps, novrsteps); + + // Reunitarize gauge links... + *num_failures_h = 0; + unitarizeLinks(*U, num_failures_d); + qudaDeviceSynchronize(); + if (*num_failures_h > 0) errorQuda("Error in the unitarization\n"); + + plaquette(*U); + } + a1.Stop(__func__, __FILE__, __LINE__); + + printfQuda("Time Monte -> %.6f s\n", a1.Last()); + plaq = plaquette(*U); + printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z); + + host_free(num_failures_h); + + // If a specific test type is requested, perfrom it now and then + // turn off all Google tests in the tear down. + switch (test_type) { + case 0: + // Do the Google testing + break; + case 1: + run_ovr(); + break; + case 2: + run_fft(); + break; + default: + errorQuda("Invalid test type %d ", test_type); + } + } + } + + virtual void TearDown() + { + if(execute) { + detu = getLinkDeterminant(*U); + double2 tru = getLinkTrace(*U); + printfQuda("Det: %.16e:%.16e\n", detu.x, detu.y); + printfQuda("Tr: %.16e:%.16e\n", tru.x / 3.0, tru.y / 3.0); + + delete U; + // Release all temporary memory used for data exchange between GPUs in multi-GPU mode + PGaugeExchangeFree(); + + a0.Stop(__func__, __FILE__, __LINE__); + printfQuda("Time -> %.6f s\n", a0.Last()); + randstates->Release(); + delete randstates; + } + // If we performed a specific instance, switch off the + // Google testing. + if(test_type != 0) execute = false; + } + + virtual void run_ovr() + { + if(execute) { + gaugeFixingOVR(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, + gf_theta_condition); + auto plaq_gf = plaquette(*U); + printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); + ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); + // Save if output string is specified + if (strcmp(gauge_outfile, "")) save_gauge(); + } + } + virtual void run_fft() + { + if(execute) { + if (!checkDimsPartitioned()) { + printfQuda("Landau gauge fixing with steepest descent method with FFTs\n"); + gaugeFixingFFT(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, + gf_theta_condition); + + auto plaq_gf = plaquette(*U); + printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); + ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); + // Save if output string is specified + if (strcmp(gauge_outfile, "")) save_gauge(); + } else { + errorQuda("Cannot perform FFT gauge fixing with MPI partitions."); + } + } + } - param.anisotropy = 1.0; // don't support anisotropy for now!!!!!! - param.t_boundary = QUDA_PERIODIC_T; - param.gauge_fix = QUDA_GAUGE_FIXED_NO; - param.ga_pad = 0; + virtual void save_gauge() { + + printfQuda("Saving the gauge field to file %s\n", gauge_outfile); + + QudaGaugeParam gauge_param = newQudaGaugeParam(); + setWilsonGaugeParam(gauge_param); + + void *cpu_gauge[4]; + for (int dir = 0; dir < 4; dir++) { cpu_gauge[dir] = malloc(V * gauge_site_size * gauge_param.cpu_prec); } GaugeFieldParam gParam(0, param); gParam.pad = 0; @@ -111,168 +300,124 @@ class GaugeAlgTest : public ::testing::Test gParam.link_type = param.type; gParam.reconstruct = param.reconstruct; gParam.setPrecision(gParam.Precision(), true); - -#ifdef MULTI_GPU - int y[4]; - int R[4] = {0, 0, 0, 0}; - for (int dir = 0; dir < 4; ++dir) - if (comm_dim_partitioned(dir)) R[dir] = 2; - for (int dir = 0; dir < 4; ++dir) y[dir] = param.X[dir] + 2 * R[dir]; - int pad = 0; - GaugeFieldParam gParamEx(y, prec, link_recon, pad, QUDA_VECTOR_GEOMETRY, QUDA_GHOST_EXCHANGE_EXTENDED); - gParamEx.create = QUDA_ZERO_FIELD_CREATE; - gParamEx.order = gParam.order; - gParamEx.siteSubset = QUDA_FULL_SITE_SUBSET; - gParamEx.t_boundary = gParam.t_boundary; - gParamEx.nFace = 1; - for (int dir = 0; dir < 4; ++dir) gParamEx.r[dir] = R[dir]; - U = new cudaGaugeField(gParamEx); -#else - U = new cudaGaugeField(gParam); -#endif - // CURAND random generator initialization - randstates = new RNG(gParam, 1234); - randstates->Init(); - - nsteps = heatbath_num_steps; - nhbsteps = heatbath_num_heatbath_per_step; - novrsteps = heatbath_num_overrelax_per_step; - coldstart = heatbath_coldstart; - beta_value = heatbath_beta_value; - - a0.Start(__func__, __FILE__, __LINE__); - a1.Start(__func__, __FILE__, __LINE__); - - int *num_failures_h = (int *)mapped_malloc(sizeof(int)); - int *num_failures_d = (int *)get_mapped_device_pointer(num_failures_h); - - if (link_recon != QUDA_RECONSTRUCT_8 && coldstart) - InitGaugeField(*U); - else - InitGaugeField(*U, *randstates); - - // Reunitarization setup - SetReunitarizationConsts(); - plaquette(*U); - - for (int step = 1; step <= nsteps; ++step) { - printfQuda("Step %d\n", step); - Monte(*U, *randstates, beta_value, nhbsteps, novrsteps); - - // Reunitarize gauge links... - *num_failures_h = 0; - unitarizeLinks(*U, num_failures_d); - qudaDeviceSynchronize(); - if (*num_failures_h > 0) errorQuda("Error in the unitarization\n"); - - plaquette(*U); - } - a1.Stop(__func__, __FILE__, __LINE__); - - printfQuda("Time Monte -> %.6f s\n", a1.Last()); - plaq = plaquette(*U); - printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z); - - host_free(num_failures_h); - } - - virtual void TearDown() - { - detu = getLinkDeterminant(*U); - double2 tru = getLinkTrace(*U); - printfQuda("Det: %.16e:%.16e\n", detu.x, detu.y); - printfQuda("Tr: %.16e:%.16e\n", tru.x / 3.0, tru.y / 3.0); - - delete U; - // Release all temporary memory used for data exchange between GPUs in multi-GPU mode - PGaugeExchangeFree(); - - a0.Stop(__func__, __FILE__, __LINE__); - printfQuda("Time -> %.6f s\n", a0.Last()); - randstates->Release(); - delete randstates; + + cudaGaugeField *gauge; + gauge = new cudaGaugeField(gParam); + + // copy into regular field + copyExtendedGauge(*gauge, *U, QUDA_CUDA_FIELD_LOCATION); + saveGaugeFieldQuda((void *)cpu_gauge, (void *)gauge, &gauge_param); + + // Write to disk + write_gauge_field(gauge_outfile, cpu_gauge, gauge_param.cpu_prec, gauge_param.X, 0, (char **)0); + + for (int dir = 0; dir < 4; dir++) free(cpu_gauge[dir]); + delete gauge; } }; - + TEST_F(GaugeAlgTest, Generation) { - detu = getLinkDeterminant(*U); - ASSERT_TRUE(CheckDeterminant(detu)); + if(execute) { + detu = getLinkDeterminant(*U); + ASSERT_TRUE(CheckDeterminant(detu)); + } } TEST_F(GaugeAlgTest, Landau_Overrelaxation) { - printfQuda("Landau gauge fixing with overrelaxation\n"); - gaugeFixingOVR(*U, 4, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, - gf_theta_condition); - auto plaq_gf = plaquette(*U); - printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); - ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); + if(execute) { + printfQuda("Landau gauge fixing with overrelaxation\n"); + gaugeFixingOVR(*U, 4, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, + gf_theta_condition); + auto plaq_gf = plaquette(*U); + printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); + ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); + } } TEST_F(GaugeAlgTest, Coulomb_Overrelaxation) { - printfQuda("Coulomb gauge fixing with overrelaxation\n"); - gaugeFixingOVR(*U, 3, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, - gf_theta_condition); - auto plaq_gf = plaquette(*U); - printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); - ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); + if(execute) { + printfQuda("Coulomb gauge fixing with overrelaxation\n"); + gaugeFixingOVR(*U, 3, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, + gf_theta_condition); + auto plaq_gf = plaquette(*U); + printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); + ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); + } } TEST_F(GaugeAlgTest, Landau_FFT) { - if (!checkDimsPartitioned()) { - printfQuda("Landau gauge fixing with steepest descent method with FFTs\n"); - gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, - gf_theta_condition); - auto plaq_gf = plaquette(*U); - printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); - ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); + if(execute) { + if (!checkDimsPartitioned()) { + printfQuda("Landau gauge fixing with steepest descent method with FFTs\n"); + gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, + gf_theta_condition); + auto plaq_gf = plaquette(*U); + printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); + ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); + } } } TEST_F(GaugeAlgTest, Coulomb_FFT) { - if (!checkDimsPartitioned()) { - printfQuda("Coulomb gauge fixing with steepest descent method with FFTs\n"); - gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, - gf_theta_condition); - auto plaq_gf = plaquette(*U); - printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); - ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); + if(execute) { + if (!checkDimsPartitioned()) { + printfQuda("Coulomb gauge fixing with steepest descent method with FFTs\n"); + gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, + gf_theta_condition); + auto plaq_gf = plaquette(*U); + printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); + ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); + } } } int main(int argc, char **argv) { - // initalize google test, includes command line options - ::testing::InitGoogleTest(&argc, argv); - // return code for google test - int test_rc = 0; - xdim = ydim = zdim = tdim = 32; - // command line options auto app = make_app(); add_gaugefix_option_group(app); add_heatbath_option_group(app); + + test_type = 0; + CLI::TransformPairs test_type_map {{"Google", 0}, {"OVR", 1}, {"FFT", 2}}; + app->add_option("--test", test_type, "Test method")->transform(CLI::CheckedTransformer(test_type_map)); try { app->parse(argc, argv); } catch (const CLI::ParseError &e) { return app->exit(e); } + if (prec_sloppy == QUDA_INVALID_PRECISION) prec_sloppy = prec; + if (link_recon_sloppy == QUDA_RECONSTRUCT_INVALID) link_recon_sloppy = link_recon; + + // initialize QMP/MPI, QUDA comms grid and RNG (host_utils.cpp) initComms(argc, argv, gridsize_from_cmdline); + // call srand() with a rank-dependent seed + initRand(); + + display_test_info(); + + // initialize the QUDA library + initQuda(device_ordinal); + + // initalize google test, includes command line options + ::testing::InitGoogleTest(&argc, argv); + // Ensure gtest prints only from rank 0 ::testing::TestEventListeners &listeners = ::testing::UnitTest::GetInstance()->listeners(); if (comm_rank() != 0) { delete listeners.Release(listeners.default_result_printer()); } - initQuda(device_ordinal); - test_rc = RUN_ALL_TESTS(); - endQuda(); - + // return code for google test + int test_rc = RUN_ALL_TESTS(); + + endQuda(); finalizeComms(); - + return test_rc; } diff --git a/tests/gauge_alg_test.cpp b/tests/gauge_alg_test.cpp deleted file mode 100644 index ad7985c464..0000000000 --- a/tests/gauge_alg_test.cpp +++ /dev/null @@ -1,266 +0,0 @@ -#include -#include -#include - -#include -#include -#include - -#include -#include -#include -#include -#include - -#include -#include -#include - -#include - -#include - -using namespace quda; - -void display_test_info() -{ - printfQuda("running the following test:\n"); - - printfQuda("prec sloppy_prec link_recon sloppy_link_recon S_dimension T_dimension Ls_dimension\n"); - printfQuda("%s %s %s %s %d/%d/%d %d %d\n", get_prec_str(prec), - get_prec_str(prec_sloppy), get_recon_str(link_recon), get_recon_str(link_recon_sloppy), xdim, ydim, zdim, - tdim, Lsdim); - - printfQuda("Grid partition info: X Y Z T\n"); - printfQuda(" %d %d %d %d\n", dimPartitioned(0), dimPartitioned(1), dimPartitioned(2), - dimPartitioned(3)); -} - -void SetReunitarizationConsts() -{ - const double unitarize_eps = 1e-14; - const double max_error = 1e-10; - const int reunit_allow_svd = 1; - const int reunit_svd_only = 0; - const double svd_rel_error = 1e-6; - const double svd_abs_error = 1e-6; - setUnitarizeLinksConstants(unitarize_eps, max_error, reunit_allow_svd, reunit_svd_only, svd_rel_error, svd_abs_error); -} - -bool checkDimsPartitioned() -{ - if (comm_dim_partitioned(0) || comm_dim_partitioned(1) || comm_dim_partitioned(2) || comm_dim_partitioned(3)) - return true; - return false; -} - -bool comparePlaquette(double3 a, double3 b) -{ - printfQuda("Plaq: %.16e, %.16e, %.16e\n", a.x, a.y, a.z); - printfQuda("Plaq_gf: %.16e, %.16e, %.16e\n", b.x, b.y, b.z); - double a0, a1, a2; - a0 = std::abs(a.x - b.x); - a1 = std::abs(a.y - b.y); - a2 = std::abs(a.z - b.z); - double prec_val = 1.0e-5; - if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1.0e-15; - return ((a0 < prec_val) && (a1 < prec_val) && (a2 < prec_val)); -} - -bool checkDeterminant(double2 detu) -{ - printfQuda("Det: %.16e: %.16e\n", detu.x, detu.y); - double prec_val = 5e-8; - if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1.0e-15; - return std::abs(1.0 - detu.x) < prec_val && std::abs(detu.y) < prec_val; -} - -int main(int argc, char **argv) -{ - // command line options - auto app = make_app(); - add_gaugefix_option_group(app); - add_heatbath_option_group(app); - CLI::TransformPairs test_type_map {{"OVR", 0}, {"FFT", 1}}; - app->add_option("--test", test_type, "Test method")->transform(CLI::CheckedTransformer(test_type_map)); - try { - app->parse(argc, argv); - } catch (const CLI::ParseError &e) { - return app->exit(e); - } - - if (prec_sloppy == QUDA_INVALID_PRECISION) prec_sloppy = prec; - if (link_recon_sloppy == QUDA_RECONSTRUCT_INVALID) link_recon_sloppy = link_recon; - - // initialize QMP/MPI, QUDA comms grid and RNG (host_utils.cpp) - initComms(argc, argv, gridsize_from_cmdline); - - // call srand() with a rank-dependent seed - initRand(); - - display_test_info(); - - // initialize the QUDA library - initQuda(device_ordinal); - - // *** QUDA parameters begin here. - setVerbosity(QUDA_VERBOSE); - QudaGaugeParam param = newQudaGaugeParam(); - - double3 plaq; - cudaGaugeField *U; - int nsteps = heatbath_num_steps; - int nhbsteps = heatbath_num_heatbath_per_step; - int novrsteps = heatbath_num_overrelax_per_step; - bool coldstart = heatbath_coldstart; - double beta_value = heatbath_beta_value; - - RNG *randstates; - - // Setup gauge container. - param.cpu_prec = prec; - param.cpu_prec = prec; - param.cuda_prec = prec; - param.reconstruct = link_recon; - param.cuda_prec_sloppy = prec; - param.reconstruct_sloppy = link_recon; - - param.type = QUDA_WILSON_LINKS; - param.gauge_order = QUDA_MILC_GAUGE_ORDER; - - param.X[0] = xdim; - param.X[1] = ydim; - param.X[2] = zdim; - param.X[3] = tdim; - setDims(param.X); - - param.anisotropy = 1.0; // don't support anisotropy for now!!!!!! - param.t_boundary = QUDA_PERIODIC_T; - param.gauge_fix = QUDA_GAUGE_FIXED_NO; - param.ga_pad = 0; - - GaugeFieldParam gParam(0, param); - gParam.pad = 0; - gParam.ghostExchange = QUDA_GHOST_EXCHANGE_NO; - gParam.create = QUDA_NULL_FIELD_CREATE; - gParam.link_type = param.type; - gParam.reconstruct = param.reconstruct; - gParam.setPrecision(gParam.Precision(), true); - - int y[4]; - int R[4] = {0, 0, 0, 0}; - for (int dir = 0; dir < 4; ++dir) - if (comm_dim_partitioned(dir)) R[dir] = 2; - for (int dir = 0; dir < 4; ++dir) y[dir] = param.X[dir] + 2 * R[dir]; - int pad = 0; - GaugeFieldParam gParamEx(y, prec, link_recon, pad, QUDA_VECTOR_GEOMETRY, QUDA_GHOST_EXCHANGE_EXTENDED); - gParamEx.create = QUDA_ZERO_FIELD_CREATE; - gParamEx.order = gParam.order; - gParamEx.siteSubset = QUDA_FULL_SITE_SUBSET; - gParamEx.t_boundary = gParam.t_boundary; - gParamEx.nFace = 1; - for (int dir = 0; dir < 4; ++dir) gParamEx.r[dir] = R[dir]; - U = new cudaGaugeField(gParamEx); - - // CURAND random generator initialization - randstates = new RNG(gParam, 1234); - randstates->Init(); - - int *num_failures_h = (int *)mapped_malloc(sizeof(int)); - int *num_failures_d = (int *)get_mapped_device_pointer(num_failures_h); - - if (link_recon != QUDA_RECONSTRUCT_8 && coldstart) - InitGaugeField(*U); - else - InitGaugeField(*U, *randstates); - - // Reunitarization setup - SetReunitarizationConsts(); - plaquette(*U); - - for (int step = 1; step <= nsteps; ++step) { - printfQuda("Step %d\n", step); - Monte(*U, *randstates, beta_value, nhbsteps, novrsteps); - - // Reunitarize gauge links... - *num_failures_h = 0; - unitarizeLinks(*U, num_failures_d); - qudaDeviceSynchronize(); - if (*num_failures_h > 0) errorQuda("Error in the unitarization\n"); - - plaquette(*U); - } - - plaq = plaquette(*U); - printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z); - - host_free(num_failures_h); - - // Gauge Fixing Routines - //--------------------------------------------------------------------------- - switch (test_type) { - case 0: - printfQuda("%s gauge fixing with overrelaxation\n", gf_gauge_dir == 4 ? "Landau" : "Coulomb"); - gaugeFixingOVR(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, - gf_reunit_interval, gf_theta_condition); - comparePlaquette(plaq, plaquette(*U)); - break; - - case 1: - if (!checkDimsPartitioned()) { - printfQuda("%s gauge fixing with steepest descent method with FFTs\n", gf_gauge_dir == 4 ? "Landau" : "Coulomb"); - gaugeFixingFFT(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, - gf_theta_condition); - comparePlaquette(plaq, plaquette(*U)); - } else { - errorQuda("FFT gauge fixing not supported for multi GPU geometry"); - } - break; - - default: errorQuda("Unknown test type %d", test_type); - } - - double2 link_trace = getLinkTrace(*U); - printfQuda("Tr: %.16e:%.16e\n", link_trace.x / 3.0, link_trace.y / 3.0); - - // Save if output string is specified - if (strcmp(gauge_outfile, "")) { - - printfQuda("Saving the gauge field to file %s\n", gauge_outfile); - - QudaGaugeParam gauge_param = newQudaGaugeParam(); - setWilsonGaugeParam(gauge_param); - - void *cpu_gauge[4]; - for (int dir = 0; dir < 4; dir++) { cpu_gauge[dir] = malloc(V * gauge_site_size * gauge_param.cpu_prec); } - - cudaGaugeField *gauge; - gauge = new cudaGaugeField(gParam); - - // copy into regular field - copyExtendedGauge(*gauge, *U, QUDA_CUDA_FIELD_LOCATION); - saveGaugeFieldQuda((void *)cpu_gauge, (void *)gauge, &gauge_param); - - // Write to disk - write_gauge_field(gauge_outfile, cpu_gauge, gauge_param.cpu_prec, gauge_param.X, 0, (char **)0); - - for (int dir = 0; dir < 4; dir++) free(cpu_gauge[dir]); - delete gauge; - } else { - printfQuda("No output file specified.\n"); - } - - delete U; - - // Release all temporary memory used for data exchange between GPUs in multi-GPU mode - PGaugeExchangeFree(); - - randstates->Release(); - delete randstates; - - freeGaugeQuda(); - endQuda(); - finalizeComms(); - - return 0; -} From d38486b2ea62b248778fec15fab5b1f02cf0cde7 Mon Sep 17 00:00:00 2001 From: cpviolator Date: Sat, 1 May 2021 18:01:05 -0700 Subject: [PATCH 06/32] Clang tidy --- include/quda.h | 3 +- include/quda_milc_interface.h | 8 +- tests/gauge_alg_ctest.cpp | 180 ++++++++++++++++------------------ 3 files changed, 91 insertions(+), 100 deletions(-) diff --git a/include/quda.h b/include/quda.h index 3151ca667e..1157fe95f4 100644 --- a/include/quda.h +++ b/include/quda.h @@ -1500,7 +1500,8 @@ extern "C" { * @param[in] Nsteps, maximum number of steps to perform gauge fixing * @param[in] verbose_interval, print gauge fixing info when iteration count is a multiple of this * @param[in] relax_boost, gauge fixing parameter of the overrelaxation method, most common value is 1.5 or 1.7. - * @param[in] tolerance, torelance value to stop the method, if this value is zero then the method stops when iteration reachs the maximum number of steps defined by Nsteps + * @param[in] tolerance, torelance value to stop the method, if this value is zero then the method stops when + * iteration reachs the maximum number of steps defined by Nsteps * @param[in] reunit_interval, reunitarize gauge field when iteration count is a multiple of this * @param[in] stopWtheta, 0 for MILC criterion and 1 to use the theta value * @param[in] param The parameters of the external fields and the computation settings diff --git a/include/quda_milc_interface.h b/include/quda_milc_interface.h index cd5a95794c..d11364d51c 100644 --- a/include/quda_milc_interface.h +++ b/include/quda_milc_interface.h @@ -953,7 +953,6 @@ extern "C" { */ void qudaDestroyGaugeField(void* gauge); - /** * @brief Gauge fixing with overrelaxation with support for single and multi GPU. * @param[in] precision, 1 for single precision else for double precision @@ -961,7 +960,8 @@ extern "C" { * @param[in] Nsteps, maximum number of steps to perform gauge fixing * @param[in] verbose_interval, print gauge fixing info when iteration count is a multiple of this * @param[in] relax_boost, gauge fixing parameter of the overrelaxation method, most common value is 1.5 or 1.7. - * @param[in] tolerance, torelance value to stop the method, if this value is zero then the method stops when iteration reachs the maximum number of steps defined by Nsteps + * @param[in] tolerance, torelance value to stop the method, if this value is zero then the method stops when + * iteration reachs the maximum number of steps defined by Nsteps * @param[in] reunit_interval, reunitarize gauge field when iteration count is a multiple of this * @param[in] stopWtheta, 0 for MILC criterion and 1 to use the theta value * @param[in,out] milc_sitelink, MILC gauge field to be fixed @@ -977,7 +977,6 @@ extern "C" { void* milc_sitelink ); - /** * @brief Gauge fixing with Steepest descent method with FFTs with support for single GPU only. * @param[in] precision, 1 for single precision else for double precision @@ -986,7 +985,8 @@ extern "C" { * @param[in] verbose_interval, print gauge fixing info when iteration count is a multiple of this * @param[in] alpha, gauge fixing parameter of the method, most common value is 0.08 * @param[in] autotune, 1 to autotune the method, i.e., if the Fg inverts its tendency we decrease the alpha value - * @param[in] tolerance, torelance value to stop the method, if this value is zero then the method stops when iteration reachs the maximum number of steps defined by Nsteps + * @param[in] tolerance, torelance value to stop the method, if this value is zero then the method stops when + * iteration reachs the maximum number of steps defined by Nsteps * @param[in] stopWtheta, 0 for MILC criterion and 1 to use the theta value * @param[in,out] milc_sitelink, MILC gauge field to be fixed */ diff --git a/tests/gauge_alg_ctest.cpp b/tests/gauge_alg_ctest.cpp index 881d53bb76..f797420c9c 100644 --- a/tests/gauge_alg_ctest.cpp +++ b/tests/gauge_alg_ctest.cpp @@ -36,18 +36,12 @@ void display_test_info() printfQuda("running the following test:\n"); switch (test_type) { - case 0: - printfQuda("\n Google testing\n"); - break; - case 1: - printfQuda("\nOVR gauge fix\n"); - break; - case 2: - printfQuda("\nFFT gauge fix\n"); - break; + case 0: printfQuda("\n Google testing\n"); break; + case 1: printfQuda("\nOVR gauge fix\n"); break; + case 2: printfQuda("\nFFT gauge fix\n"); break; default: errorQuda("Undefined test type %d given", test_type); } - + printfQuda("prec sloppy_prec link_recon sloppy_link_recon S_dimension T_dimension Ls_dimension\n"); printfQuda("%s %s %s %s %d/%d/%d %d %d\n", get_prec_str(prec), get_prec_str(prec_sloppy), get_recon_str(link_recon), get_recon_str(link_recon_sloppy), xdim, ydim, zdim, @@ -72,9 +66,9 @@ class GaugeAlgTest : public ::testing::Test int novrsteps; bool coldstart; double beta_value; - + bool unit_test; - + RNG *randstates; void SetReunitarizationConsts() @@ -115,10 +109,10 @@ class GaugeAlgTest : public ::testing::Test virtual void SetUp() { - if(execute) { + if (execute) { setVerbosity(QUDA_VERBOSE); param = newQudaGaugeParam(); - + // Setup gauge container. param.cpu_prec = prec; param.cpu_prec = prec; @@ -126,21 +120,21 @@ class GaugeAlgTest : public ::testing::Test param.reconstruct = link_recon; param.cuda_prec_sloppy = prec; param.reconstruct_sloppy = link_recon; - + param.type = QUDA_WILSON_LINKS; param.gauge_order = QUDA_MILC_GAUGE_ORDER; - + param.X[0] = xdim; param.X[1] = ydim; param.X[2] = zdim; param.X[3] = tdim; setDims(param.X); - + param.anisotropy = 1.0; // don't support anisotropy for now!!!!!! param.t_boundary = QUDA_PERIODIC_T; param.gauge_fix = QUDA_GAUGE_FIXED_NO; param.ga_pad = 0; - + GaugeFieldParam gParam(0, param); gParam.pad = 0; gParam.ghostExchange = QUDA_GHOST_EXCHANGE_NO; @@ -148,12 +142,12 @@ class GaugeAlgTest : public ::testing::Test gParam.link_type = param.type; gParam.reconstruct = param.reconstruct; gParam.setPrecision(gParam.Precision(), true); - + #ifdef MULTI_GPU int y[4]; int R[4] = {0, 0, 0, 0}; for (int dir = 0; dir < 4; ++dir) - if (comm_dim_partitioned(dir)) R[dir] = 2; + if (comm_dim_partitioned(dir)) R[dir] = 2; for (int dir = 0; dir < 4; ++dir) y[dir] = param.X[dir] + 2 * R[dir]; int pad = 0; GaugeFieldParam gParamEx(y, prec, link_recon, pad, QUDA_VECTOR_GEOMETRY, QUDA_GHOST_EXCHANGE_EXTENDED); @@ -170,93 +164,88 @@ class GaugeAlgTest : public ::testing::Test // CURAND random generator initialization randstates = new RNG(gParam, 1234); randstates->Init(); - + nsteps = heatbath_num_steps; nhbsteps = heatbath_num_heatbath_per_step; novrsteps = heatbath_num_overrelax_per_step; coldstart = heatbath_coldstart; beta_value = heatbath_beta_value; - + a0.Start(__func__, __FILE__, __LINE__); a1.Start(__func__, __FILE__, __LINE__); - + int *num_failures_h = (int *)mapped_malloc(sizeof(int)); int *num_failures_d = (int *)get_mapped_device_pointer(num_failures_h); - + if (link_recon != QUDA_RECONSTRUCT_8 && coldstart) - InitGaugeField(*U); + InitGaugeField(*U); else - InitGaugeField(*U, *randstates); - + InitGaugeField(*U, *randstates); + // Reunitarization setup SetReunitarizationConsts(); plaquette(*U); - + for (int step = 1; step <= nsteps; ++step) { - printfQuda("Step %d\n", step); - Monte(*U, *randstates, beta_value, nhbsteps, novrsteps); - - // Reunitarize gauge links... - *num_failures_h = 0; - unitarizeLinks(*U, num_failures_d); - qudaDeviceSynchronize(); - if (*num_failures_h > 0) errorQuda("Error in the unitarization\n"); - - plaquette(*U); + printfQuda("Step %d\n", step); + Monte(*U, *randstates, beta_value, nhbsteps, novrsteps); + + // Reunitarize gauge links... + *num_failures_h = 0; + unitarizeLinks(*U, num_failures_d); + qudaDeviceSynchronize(); + if (*num_failures_h > 0) errorQuda("Error in the unitarization\n"); + + plaquette(*U); } a1.Stop(__func__, __FILE__, __LINE__); - + printfQuda("Time Monte -> %.6f s\n", a1.Last()); plaq = plaquette(*U); printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z); - + host_free(num_failures_h); // If a specific test type is requested, perfrom it now and then // turn off all Google tests in the tear down. switch (test_type) { case 0: - // Do the Google testing - break; - case 1: - run_ovr(); - break; - case 2: - run_fft(); - break; - default: - errorQuda("Invalid test type %d ", test_type); + // Do the Google testing + break; + case 1: run_ovr(); break; + case 2: run_fft(); break; + default: errorQuda("Invalid test type %d ", test_type); } } } - + virtual void TearDown() { - if(execute) { + if (execute) { detu = getLinkDeterminant(*U); double2 tru = getLinkTrace(*U); printfQuda("Det: %.16e:%.16e\n", detu.x, detu.y); printfQuda("Tr: %.16e:%.16e\n", tru.x / 3.0, tru.y / 3.0); - + delete U; // Release all temporary memory used for data exchange between GPUs in multi-GPU mode PGaugeExchangeFree(); - + a0.Stop(__func__, __FILE__, __LINE__); printfQuda("Time -> %.6f s\n", a0.Last()); randstates->Release(); - delete randstates; + delete randstates; } // If we performed a specific instance, switch off the // Google testing. - if(test_type != 0) execute = false; + if (test_type != 0) execute = false; } - + virtual void run_ovr() { - if(execute) { - gaugeFixingOVR(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, - gf_theta_condition); + if (execute) { + gaugeFixingOVR(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, + gf_reunit_interval, gf_theta_condition); auto plaq_gf = plaquette(*U); printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); @@ -266,30 +255,31 @@ class GaugeAlgTest : public ::testing::Test } virtual void run_fft() { - if(execute) { + if (execute) { if (!checkDimsPartitioned()) { - printfQuda("Landau gauge fixing with steepest descent method with FFTs\n"); - gaugeFixingFFT(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, - gf_theta_condition); - - auto plaq_gf = plaquette(*U); - printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); - ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); - // Save if output string is specified - if (strcmp(gauge_outfile, "")) save_gauge(); + printfQuda("Landau gauge fixing with steepest descent method with FFTs\n"); + gaugeFixingFFT(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, + gf_theta_condition); + + auto plaq_gf = plaquette(*U); + printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); + ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); + // Save if output string is specified + if (strcmp(gauge_outfile, "")) save_gauge(); } else { - errorQuda("Cannot perform FFT gauge fixing with MPI partitions."); + errorQuda("Cannot perform FFT gauge fixing with MPI partitions."); } } } - virtual void save_gauge() { - + virtual void save_gauge() + { + printfQuda("Saving the gauge field to file %s\n", gauge_outfile); - + QudaGaugeParam gauge_param = newQudaGaugeParam(); setWilsonGaugeParam(gauge_param); - + void *cpu_gauge[4]; for (int dir = 0; dir < 4; dir++) { cpu_gauge[dir] = malloc(V * gauge_site_size * gauge_param.cpu_prec); } @@ -300,25 +290,25 @@ class GaugeAlgTest : public ::testing::Test gParam.link_type = param.type; gParam.reconstruct = param.reconstruct; gParam.setPrecision(gParam.Precision(), true); - + cudaGaugeField *gauge; gauge = new cudaGaugeField(gParam); - + // copy into regular field copyExtendedGauge(*gauge, *U, QUDA_CUDA_FIELD_LOCATION); saveGaugeFieldQuda((void *)cpu_gauge, (void *)gauge, &gauge_param); - + // Write to disk write_gauge_field(gauge_outfile, cpu_gauge, gauge_param.cpu_prec, gauge_param.X, 0, (char **)0); - + for (int dir = 0; dir < 4; dir++) free(cpu_gauge[dir]); delete gauge; } }; - + TEST_F(GaugeAlgTest, Generation) { - if(execute) { + if (execute) { detu = getLinkDeterminant(*U); ASSERT_TRUE(CheckDeterminant(detu)); } @@ -326,10 +316,10 @@ TEST_F(GaugeAlgTest, Generation) TEST_F(GaugeAlgTest, Landau_Overrelaxation) { - if(execute) { + if (execute) { printfQuda("Landau gauge fixing with overrelaxation\n"); gaugeFixingOVR(*U, 4, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, - gf_theta_condition); + gf_theta_condition); auto plaq_gf = plaquette(*U); printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); @@ -338,10 +328,10 @@ TEST_F(GaugeAlgTest, Landau_Overrelaxation) TEST_F(GaugeAlgTest, Coulomb_Overrelaxation) { - if(execute) { + if (execute) { printfQuda("Coulomb gauge fixing with overrelaxation\n"); gaugeFixingOVR(*U, 3, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, - gf_theta_condition); + gf_theta_condition); auto plaq_gf = plaquette(*U); printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); @@ -350,11 +340,11 @@ TEST_F(GaugeAlgTest, Coulomb_Overrelaxation) TEST_F(GaugeAlgTest, Landau_FFT) { - if(execute) { + if (execute) { if (!checkDimsPartitioned()) { printfQuda("Landau gauge fixing with steepest descent method with FFTs\n"); gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, - gf_theta_condition); + gf_theta_condition); auto plaq_gf = plaquette(*U); printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); @@ -364,11 +354,11 @@ TEST_F(GaugeAlgTest, Landau_FFT) TEST_F(GaugeAlgTest, Coulomb_FFT) { - if(execute) { + if (execute) { if (!checkDimsPartitioned()) { printfQuda("Coulomb gauge fixing with steepest descent method with FFTs\n"); gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, - gf_theta_condition); + gf_theta_condition); auto plaq_gf = plaquette(*U); printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); @@ -382,7 +372,7 @@ int main(int argc, char **argv) auto app = make_app(); add_gaugefix_option_group(app); add_heatbath_option_group(app); - + test_type = 0; CLI::TransformPairs test_type_map {{"Google", 0}, {"OVR", 1}, {"FFT", 2}}; app->add_option("--test", test_type, "Test method")->transform(CLI::CheckedTransformer(test_type_map)); @@ -395,12 +385,12 @@ int main(int argc, char **argv) if (prec_sloppy == QUDA_INVALID_PRECISION) prec_sloppy = prec; if (link_recon_sloppy == QUDA_RECONSTRUCT_INVALID) link_recon_sloppy = link_recon; - // initialize QMP/MPI, QUDA comms grid and RNG (host_utils.cpp) + // initialize QMP/MPI, QUDA comms grid and RNG (host_utils.cpp) initComms(argc, argv, gridsize_from_cmdline); // call srand() with a rank-dependent seed initRand(); - + display_test_info(); // initialize the QUDA library @@ -415,9 +405,9 @@ int main(int argc, char **argv) // return code for google test int test_rc = RUN_ALL_TESTS(); - - endQuda(); + + endQuda(); finalizeComms(); - + return test_rc; } From 426076bbd74af7d7a8e7004de7a9dcfeb3fdca78 Mon Sep 17 00:00:00 2001 From: cpviolator Date: Fri, 30 Apr 2021 17:34:21 -0700 Subject: [PATCH 07/32] Add gf test interface to allow fine grained control over the GF testing --- tests/gauge_alg_test.cpp | 3 ++- tests/heatbath_test.cpp | 3 ++- tests/utils/command_line_params.cpp | 24 ++++++------------------ 3 files changed, 10 insertions(+), 20 deletions(-) diff --git a/tests/gauge_alg_test.cpp b/tests/gauge_alg_test.cpp index b1d09ed7de..46a0f92dd7 100644 --- a/tests/gauge_alg_test.cpp +++ b/tests/gauge_alg_test.cpp @@ -380,7 +380,7 @@ int main(int argc, char **argv) // initalize google test, includes command line options ::testing::InitGoogleTest(&argc, argv); - // command line options + // command line options auto app = make_app(); add_gaugefix_option_group(app); add_heatbath_option_group(app); @@ -388,6 +388,7 @@ int main(int argc, char **argv) test_type = 0; CLI::TransformPairs test_type_map {{"Google", 0}, {"OVR", 1}, {"FFT", 2}}; app->add_option("--test", test_type, "Test method")->transform(CLI::CheckedTransformer(test_type_map)); + try { app->parse(argc, argv); } catch (const CLI::ParseError &e) { diff --git a/tests/heatbath_test.cpp b/tests/heatbath_test.cpp index ff18f5d13f..37588df0ba 100644 --- a/tests/heatbath_test.cpp +++ b/tests/heatbath_test.cpp @@ -53,9 +53,10 @@ void display_test_info() } int main(int argc, char **argv) -{ +{ // command line options auto app = make_app(); + add_heatbath_option_group(app); try { app->parse(argc, argv); } catch (const CLI::ParseError &e) { diff --git a/tests/utils/command_line_params.cpp b/tests/utils/command_line_params.cpp index 581943fbaa..7b9b0abab6 100644 --- a/tests/utils/command_line_params.cpp +++ b/tests/utils/command_line_params.cpp @@ -223,6 +223,12 @@ quda::mgarray mg_eig_save_prec = {}; bool mg_eig_coarse_guess = false; bool mg_eig_preserve_deflation = false; +int eofa_pm = 1; +double eofa_shift = -1.2345; +double eofa_mq1 = 1.0; +double eofa_mq2 = 0.085; +double eofa_mq3 = 1.0; + double heatbath_beta_value = 6.2; int heatbath_warmup_steps = 10; int heatbath_num_steps = 10; @@ -230,12 +236,6 @@ int heatbath_num_heatbath_per_step = 5; int heatbath_num_overrelax_per_step = 5; bool heatbath_coldstart = false; -int eofa_pm = 1; -double eofa_shift = -1.2345; -double eofa_mq1 = 1.0; -double eofa_mq2 = 0.085; -double eofa_mq3 = 1.0; - double stout_smear_rho = 0.1; double stout_smear_epsilon = -0.25; double ape_smear_rho = 0.6; @@ -505,18 +505,6 @@ std::shared_ptr make_app(std::string app_description, std::string app_n quda_app->add_option("--gaussian-sigma", gaussian_sigma, "Width of the Gaussian noise used for random gauge field contruction (default 0.2)"); - quda_app->add_option("--heatbath-beta", heatbath_beta_value, "Beta value used in heatbath test (default 6.2)"); - quda_app->add_option("--heatbath-coldstart", heatbath_coldstart, - "Whether to use a cold or hot start in heatbath test (default false)"); - quda_app->add_option("--heatbath-num-hb-per-step", heatbath_num_heatbath_per_step, - "Number of heatbath hits per heatbath step (default 5)"); - quda_app->add_option("--heatbath-num-or-per-step", heatbath_num_overrelax_per_step, - "Number of overrelaxation hits per heatbath step (default 5)"); - quda_app->add_option("--heatbath-num-steps", heatbath_num_steps, - "Number of measurement steps in heatbath test (default 10)"); - quda_app->add_option("--heatbath-warmup-steps", heatbath_warmup_steps, - "Number of warmup steps in heatbath test (default 10)"); - quda_app->add_option("--inv-type", inv_type, "The type of solver to use (default cg)") ->transform(CLI::QUDACheckedTransformer(inverter_type_map)); quda_app->add_option("--inv-deflate", inv_deflate, "Deflate the inverter using the eigensolver"); From fd30f59fc16efaf62ddfa531740086208078af69 Mon Sep 17 00:00:00 2001 From: cpviolator Date: Fri, 30 Apr 2021 18:48:50 -0700 Subject: [PATCH 08/32] Move the gauge al test to a ctest, make a new interface to the gauge fixing that allows for fine grained control and gauge IO --- lib/interface_quda.cpp | 89 ++++++------- tests/CMakeLists.txt | 4 + tests/gauge_alg_ctest.cpp | 274 ++++++++++++++++++++++++++++++++++++++ tests/gauge_alg_test.cpp | 57 +++----- tests/su3_test.cpp | 21 +-- 5 files changed, 344 insertions(+), 101 deletions(-) create mode 100644 tests/gauge_alg_ctest.cpp diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp index bb2b578713..d873a89e72 100644 --- a/lib/interface_quda.cpp +++ b/lib/interface_quda.cpp @@ -234,8 +234,8 @@ static TimeProfile profileMomAction("momActionQuda"); static TimeProfile profileEnd("endQuda"); //!< Profiler for GaugeFixing -static TimeProfile GaugeFixFFTQuda("GaugeFixFFTQuda"); -static TimeProfile GaugeFixOVRQuda("GaugeFixOVRQuda"); +static TimeProfile profileGaugeFixFFT("gaugeFixFFTQuda"); +static TimeProfile profileGaugeFixOVR("gaugeFixOVRQuda"); //!< Profiler for toal time spend between init and end static TimeProfile profileInit2End("initQuda-endQuda",false); @@ -1537,6 +1537,8 @@ void endQuda(void) profileProject.Print(); profilePhase.Print(); profileMomAction.Print(); + profileGaugeFixOVR.Print(); + profileGaugeFixFFT.Print(); profileEnd.Print(); profileInit2End.Print(); @@ -5538,12 +5540,12 @@ int computeGaugeFixingOVRQuda(void *gauge, const unsigned int gauge_dir, const u const unsigned int reunit_interval, const unsigned int stopWtheta, QudaGaugeParam *param, double *timeinfo) { - GaugeFixOVRQuda.TPSTART(QUDA_PROFILE_TOTAL); - + profileGaugeFixOVR.TPSTART(QUDA_PROFILE_TOTAL); + checkGaugeParam(param); - GaugeFixOVRQuda.TPSTART(QUDA_PROFILE_INIT); - GaugeFieldParam gParam(*param, gauge); + profileGaugeFixOVR.TPSTART(QUDA_PROFILE_INIT); + GaugeFieldParam gParam(gauge, *param); auto *cpuGauge = new cpuGaugeField(gParam); // gParam.pad = getFatLinkPadding(param->X); @@ -5553,44 +5555,37 @@ int computeGaugeFixingOVRQuda(void *gauge, const unsigned int gauge_dir, const u gParam.setPrecision(gParam.Precision(), true); auto *cudaInGauge = new cudaGaugeField(gParam); - GaugeFixOVRQuda.TPSTOP(QUDA_PROFILE_INIT); - GaugeFixOVRQuda.TPSTART(QUDA_PROFILE_H2D); + profileGaugeFixOVR.TPSTOP(QUDA_PROFILE_INIT); + profileGaugeFixOVR.TPSTART(QUDA_PROFILE_H2D); - ///if (!param->use_resident_gauge) { // load fields onto the device cudaInGauge->loadCPUField(*cpuGauge); - /* } else { // or use resident fields already present - if (!gaugePrecise) errorQuda("No resident gauge field allocated"); - cudaInGauge = gaugePrecise; - gaugePrecise = nullptr; - } */ - GaugeFixOVRQuda.TPSTOP(QUDA_PROFILE_H2D); + profileGaugeFixOVR.TPSTOP(QUDA_PROFILE_H2D); if (comm_size() == 1) { // perform the update - GaugeFixOVRQuda.TPSTART(QUDA_PROFILE_COMPUTE); + profileGaugeFixOVR.TPSTART(QUDA_PROFILE_COMPUTE); gaugeFixingOVR(*cudaInGauge, gauge_dir, Nsteps, verbose_interval, relax_boost, tolerance, reunit_interval, stopWtheta); - GaugeFixOVRQuda.TPSTOP(QUDA_PROFILE_COMPUTE); + profileGaugeFixOVR.TPSTOP(QUDA_PROFILE_COMPUTE); } else { - cudaGaugeField *cudaInGaugeEx = createExtendedGauge(*cudaInGauge, R, GaugeFixOVRQuda); + cudaGaugeField *cudaInGaugeEx = createExtendedGauge(*cudaInGauge, R, profileGaugeFixOVR); - // perform the update - GaugeFixOVRQuda.TPSTART(QUDA_PROFILE_COMPUTE); + // Perform the update + profileGaugeFixOVR.TPSTART(QUDA_PROFILE_COMPUTE); gaugeFixingOVR(*cudaInGaugeEx, gauge_dir, Nsteps, verbose_interval, relax_boost, tolerance, reunit_interval, stopWtheta); - GaugeFixOVRQuda.TPSTOP(QUDA_PROFILE_COMPUTE); + profileGaugeFixOVR.TPSTOP(QUDA_PROFILE_COMPUTE); - //HOW TO COPY BACK TO CPU: cudaInGaugeEx->cpuGauge copyExtendedGauge(*cudaInGauge, *cudaInGaugeEx, QUDA_CUDA_FIELD_LOCATION); } - - // copy the gauge field back to the host - GaugeFixOVRQuda.TPSTART(QUDA_PROFILE_D2H); + + // Copy the gauge field back to the host + profileGaugeFixOVR.TPSTART(QUDA_PROFILE_D2H); cudaInGauge->saveCPUField(*cpuGauge); - GaugeFixOVRQuda.TPSTOP(QUDA_PROFILE_D2H); + profileGaugeFixOVR.TPSTOP(QUDA_PROFILE_D2H); - GaugeFixOVRQuda.TPSTOP(QUDA_PROFILE_TOTAL); + profileGaugeFixOVR.TPSTOP(QUDA_PROFILE_TOTAL); if (param->make_resident_gauge) { if (gaugePrecise != nullptr) delete gaugePrecise; @@ -5600,9 +5595,9 @@ int computeGaugeFixingOVRQuda(void *gauge, const unsigned int gauge_dir, const u } if(timeinfo){ - timeinfo[0] = GaugeFixOVRQuda.Last(QUDA_PROFILE_H2D); - timeinfo[1] = GaugeFixOVRQuda.Last(QUDA_PROFILE_COMPUTE); - timeinfo[2] = GaugeFixOVRQuda.Last(QUDA_PROFILE_D2H); + timeinfo[0] = profileGaugeFixOVR.Last(QUDA_PROFILE_H2D); + timeinfo[1] = profileGaugeFixOVR.Last(QUDA_PROFILE_COMPUTE); + timeinfo[2] = profileGaugeFixOVR.Last(QUDA_PROFILE_D2H); } return 0; @@ -5612,11 +5607,11 @@ int computeGaugeFixingFFTQuda(void* gauge, const unsigned int gauge_dir, const const unsigned int verbose_interval, const double alpha, const unsigned int autotune, const double tolerance, \ const unsigned int stopWtheta, QudaGaugeParam* param , double* timeinfo) { - GaugeFixFFTQuda.TPSTART(QUDA_PROFILE_TOTAL); + profileGaugeFixFFT.TPSTART(QUDA_PROFILE_TOTAL); checkGaugeParam(param); - GaugeFixFFTQuda.TPSTART(QUDA_PROFILE_INIT); + profileGaugeFixFFT.TPSTART(QUDA_PROFILE_INIT); GaugeFieldParam gParam(*param, gauge); auto *cpuGauge = new cpuGaugeField(gParam); @@ -5629,33 +5624,27 @@ int computeGaugeFixingFFTQuda(void* gauge, const unsigned int gauge_dir, const auto *cudaInGauge = new cudaGaugeField(gParam); - GaugeFixFFTQuda.TPSTOP(QUDA_PROFILE_INIT); + profileGaugeFixFFT.TPSTOP(QUDA_PROFILE_INIT); - GaugeFixFFTQuda.TPSTART(QUDA_PROFILE_H2D); + profileGaugeFixFFT.TPSTART(QUDA_PROFILE_H2D); - //if (!param->use_resident_gauge) { // load fields onto the device cudaInGauge->loadCPUField(*cpuGauge); - /*} else { // or use resident fields already present - if (!gaugePrecise) errorQuda("No resident gauge field allocated"); - cudaInGauge = gaugePrecise; - gaugePrecise = nullptr; - } */ - GaugeFixFFTQuda.TPSTOP(QUDA_PROFILE_H2D); + profileGaugeFixFFT.TPSTOP(QUDA_PROFILE_H2D); // perform the update - GaugeFixFFTQuda.TPSTART(QUDA_PROFILE_COMPUTE); + profileGaugeFixFFT.TPSTART(QUDA_PROFILE_COMPUTE); gaugeFixingFFT(*cudaInGauge, gauge_dir, Nsteps, verbose_interval, alpha, autotune, tolerance, stopWtheta); - GaugeFixFFTQuda.TPSTOP(QUDA_PROFILE_COMPUTE); + profileGaugeFixFFT.TPSTOP(QUDA_PROFILE_COMPUTE); // copy the gauge field back to the host - GaugeFixFFTQuda.TPSTART(QUDA_PROFILE_D2H); + profileGaugeFixFFT.TPSTART(QUDA_PROFILE_D2H); cudaInGauge->saveCPUField(*cpuGauge); - GaugeFixFFTQuda.TPSTOP(QUDA_PROFILE_D2H); + profileGaugeFixFFT.TPSTOP(QUDA_PROFILE_D2H); - GaugeFixFFTQuda.TPSTOP(QUDA_PROFILE_TOTAL); + profileGaugeFixFFT.TPSTOP(QUDA_PROFILE_TOTAL); if (param->make_resident_gauge) { if (gaugePrecise != nullptr) delete gaugePrecise; @@ -5663,11 +5652,11 @@ int computeGaugeFixingFFTQuda(void* gauge, const unsigned int gauge_dir, const } else { delete cudaInGauge; } - + if (timeinfo) { - timeinfo[0] = GaugeFixFFTQuda.Last(QUDA_PROFILE_H2D); - timeinfo[1] = GaugeFixFFTQuda.Last(QUDA_PROFILE_COMPUTE); - timeinfo[2] = GaugeFixFFTQuda.Last(QUDA_PROFILE_D2H); + timeinfo[0] = profileGaugeFixFFT.Last(QUDA_PROFILE_H2D); + timeinfo[1] = profileGaugeFixFFT.Last(QUDA_PROFILE_COMPUTE); + timeinfo[2] = profileGaugeFixFFT.Last(QUDA_PROFILE_D2H); } return 0; diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index dcd7c8fa4b..6b9d21e1d5 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -226,6 +226,10 @@ if(QUDA_GAUGE_ALG) add_executable(gauge_alg_test gauge_alg_test.cpp) target_link_libraries(gauge_alg_test ${TEST_LIBS}) quda_checkbuildtest(gauge_alg_test QUDA_BUILD_ALL_TESTS) + + add_executable(gauge_alg_ctest gauge_alg_ctest.cpp) + target_link_libraries(gauge_alg_ctest ${TEST_LIBS}) + quda_checkbuildtest(gauge_alg_ctest QUDA_BUILD_ALL_TESTS) install(TARGETS gauge_alg_test ${QUDA_EXCLUDE_FROM_INSTALL} DESTINATION ${CMAKE_INSTALL_BINDIR}) add_executable(heatbath_test heatbath_test.cpp) diff --git a/tests/gauge_alg_ctest.cpp b/tests/gauge_alg_ctest.cpp new file mode 100644 index 0000000000..410d2304bd --- /dev/null +++ b/tests/gauge_alg_ctest.cpp @@ -0,0 +1,274 @@ +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + +#include + +#include + +using namespace quda; + +class GaugeAlgTest : public ::testing::Test +{ + protected: + + QudaGaugeParam param; + + Timer a0,a1; + double2 detu; + double3 plaq; + cudaGaugeField *U; + int nsteps; + int nhbsteps; + int novrsteps; + bool coldstart; + double beta_value; + + RNG * randstates; + + + void SetReunitarizationConsts(){ + const double unitarize_eps = 1e-14; + const double max_error = 1e-10; + const int reunit_allow_svd = 1; + const int reunit_svd_only = 0; + const double svd_rel_error = 1e-6; + const double svd_abs_error = 1e-6; + setUnitarizeLinksConstants(unitarize_eps, max_error, + reunit_allow_svd, reunit_svd_only, + svd_rel_error, svd_abs_error); + + } + + bool checkDimsPartitioned() + { + if (comm_dim_partitioned(0) || comm_dim_partitioned(1) || comm_dim_partitioned(2) || comm_dim_partitioned(3)) + return true; + return false; + } + + bool comparePlaquette(double3 a, double3 b){ + double a0,a1,a2; + a0 = std::abs(a.x - b.x); + a1 = std::abs(a.y - b.y); + a2 = std::abs(a.z - b.z); + double prec_val = 1.0e-5; + if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1.0e-15; + if ((a0 < prec_val) && (a1 < prec_val) && (a2 < prec_val)) return true; + return false; + } + + bool CheckDeterminant(double2 detu){ + double prec_val = 5e-8; + if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1.0e-15; + if (std::abs(1.0 - detu.x) < prec_val && std::abs(detu.y) < prec_val) return true; + return false; + } + + virtual void SetUp() { + setVerbosity(QUDA_VERBOSE); + + param = newQudaGaugeParam(); + + // Setup gauge container. + param.cpu_prec = prec; + param.cpu_prec = prec; + param.cuda_prec = prec; + param.reconstruct = link_recon; + param.cuda_prec_sloppy = prec; + param.reconstruct_sloppy = link_recon; + + param.type = QUDA_WILSON_LINKS; + param.gauge_order = QUDA_MILC_GAUGE_ORDER; + + param.X[0] = xdim; + param.X[1] = ydim; + param.X[2] = zdim; + param.X[3] = tdim; + setDims(param.X); + + param.anisotropy = 1.0; //don't support anisotropy for now!!!!!! + param.t_boundary = QUDA_PERIODIC_T; + param.gauge_fix = QUDA_GAUGE_FIXED_NO; + param.ga_pad = 0; + + GaugeFieldParam gParam(0, param); + gParam.pad = 0; + gParam.ghostExchange = QUDA_GHOST_EXCHANGE_NO; + gParam.create = QUDA_NULL_FIELD_CREATE; + gParam.link_type = param.type; + gParam.reconstruct = param.reconstruct; + gParam.setPrecision(gParam.Precision(), true); + +#ifdef MULTI_GPU + int y[4]; + int R[4] = {0,0,0,0}; + for(int dir=0; dir<4; ++dir) if(comm_dim_partitioned(dir)) R[dir] = 2; + for(int dir=0; dir<4; ++dir) y[dir] = param.X[dir] + 2 * R[dir]; + int pad = 0; + GaugeFieldParam gParamEx(y, prec, link_recon, + pad, QUDA_VECTOR_GEOMETRY, QUDA_GHOST_EXCHANGE_EXTENDED); + gParamEx.create = QUDA_ZERO_FIELD_CREATE; + gParamEx.order = gParam.order; + gParamEx.siteSubset = QUDA_FULL_SITE_SUBSET; + gParamEx.t_boundary = gParam.t_boundary; + gParamEx.nFace = 1; + for(int dir=0; dir<4; ++dir) gParamEx.r[dir] = R[dir]; + U = new cudaGaugeField(gParamEx); +#else + U = new cudaGaugeField(gParam); +#endif + // CURAND random generator initialization + randstates = new RNG(gParam, 1234); + randstates->Init(); + + nsteps = heatbath_num_steps; + nhbsteps = heatbath_num_heatbath_per_step; + novrsteps = heatbath_num_overrelax_per_step; + coldstart = heatbath_coldstart; + beta_value = heatbath_beta_value; + + a0.Start(__func__, __FILE__, __LINE__); + a1.Start(__func__, __FILE__, __LINE__); + + int *num_failures_h = (int *)mapped_malloc(sizeof(int)); + int *num_failures_d = (int *)get_mapped_device_pointer(num_failures_h); + + if (link_recon != QUDA_RECONSTRUCT_8 && coldstart) + InitGaugeField(*U); + else + InitGaugeField(*U, *randstates); + + // Reunitarization setup + SetReunitarizationConsts(); + plaquette(*U); + + for(int step=1; step<=nsteps; ++step){ + printfQuda("Step %d\n",step); + Monte(*U, *randstates, beta_value, nhbsteps, novrsteps); + + //Reunitarize gauge links... + *num_failures_h = 0; + unitarizeLinks(*U, num_failures_d); + qudaDeviceSynchronize(); + if (*num_failures_h > 0) errorQuda("Error in the unitarization\n"); + + plaquette(*U); + } + a1.Stop(__func__, __FILE__, __LINE__); + + printfQuda("Time Monte -> %.6f s\n", a1.Last()); + plaq = plaquette(*U); + printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z); + + host_free(num_failures_h); + } + + virtual void TearDown() { + detu = getLinkDeterminant(*U); + double2 tru = getLinkTrace(*U); + printfQuda("Det: %.16e:%.16e\n", detu.x, detu.y); + printfQuda("Tr: %.16e:%.16e\n", tru.x/3.0, tru.y/3.0); + + delete U; + //Release all temporary memory used for data exchange between GPUs in multi-GPU mode + PGaugeExchangeFree(); + + a0.Stop(__func__, __FILE__, __LINE__); + printfQuda("Time -> %.6f s\n", a0.Last()); + randstates->Release(); + delete randstates; + } +}; + +TEST_F(GaugeAlgTest, Generation) +{ + detu = getLinkDeterminant(*U); + ASSERT_TRUE(CheckDeterminant(detu)); +} + +TEST_F(GaugeAlgTest, Landau_Overrelaxation) +{ + printfQuda("Landau gauge fixing with overrelaxation\n"); + gaugeFixingOVR(*U, 4, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, gf_theta_condition); + auto plaq_gf = plaquette(*U); + printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); + ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); +} + +TEST_F(GaugeAlgTest, Coulomb_Overrelaxation) +{ + printfQuda("Coulomb gauge fixing with overrelaxation\n"); + gaugeFixingOVR(*U, 3, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, gf_theta_condition); + auto plaq_gf = plaquette(*U); + printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); + ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); +} + +TEST_F(GaugeAlgTest, Landau_FFT) +{ + if (!checkDimsPartitioned()) { + printfQuda("Landau gauge fixing with steepest descent method with FFTs\n"); + gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, gf_theta_condition); + auto plaq_gf = plaquette(*U); + printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); + ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); + } +} + +TEST_F(GaugeAlgTest, Coulomb_FFT) +{ + if (!checkDimsPartitioned()) { + printfQuda("Coulomb gauge fixing with steepest descent method with FFTs\n"); + gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, gf_theta_condition); + auto plaq_gf = plaquette(*U); + printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); + ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); + } +} + +int main(int argc, char **argv) +{ + // initalize google test, includes command line options + ::testing::InitGoogleTest(&argc, argv); + // return code for google test + int test_rc = 0; + xdim=ydim=zdim=tdim=32; + + // command line options + auto app = make_app(); + add_gaugefix_option_group(app); + add_heatbath_option_group(app); + try { + app->parse(argc, argv); + } catch (const CLI::ParseError &e) { + return app->exit(e); + } + + initComms(argc, argv, gridsize_from_cmdline); + + // Ensure gtest prints only from rank 0 + ::testing::TestEventListeners &listeners = ::testing::UnitTest::GetInstance()->listeners(); + if (comm_rank() != 0) { delete listeners.Release(listeners.default_result_printer()); } + + initQuda(device_ordinal); + test_rc = RUN_ALL_TESTS(); + endQuda(); + + finalizeComms(); + + return test_rc; +} diff --git a/tests/gauge_alg_test.cpp b/tests/gauge_alg_test.cpp index 46a0f92dd7..d97f4c41bf 100644 --- a/tests/gauge_alg_test.cpp +++ b/tests/gauge_alg_test.cpp @@ -329,50 +329,32 @@ TEST_F(GaugeAlgTest, Landau_Overrelaxation) } } -TEST_F(GaugeAlgTest, Coulomb_Overrelaxation) +bool checkDimsPartitioned() { - if (execute) { - printfQuda("Coulomb gauge fixing with overrelaxation\n"); - gaugeFixingOVR(*U, 3, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, - gf_theta_condition); - auto plaq_gf = plaquette(*U); - printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z); - printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); - ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); - saveTuneCache(); - } + if (comm_dim_partitioned(0) || comm_dim_partitioned(1) || comm_dim_partitioned(2) || comm_dim_partitioned(3)) + return true; + return false; } -TEST_F(GaugeAlgTest, Landau_FFT) +bool comparePlaquette(double3 a, double3 b) { - if (execute) { - if (!comm_partitioned()) { - printfQuda("Landau gauge fixing with steepest descent method with FFTs\n"); - gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, - gf_theta_condition); - auto plaq_gf = plaquette(*U); - printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z); - printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); - ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); - saveTuneCache(); - } - } + printfQuda("Plaq: %.16e, %.16e, %.16e\n", a.x, a.y, a.z); + printfQuda("Plaq_gf: %.16e, %.16e, %.16e\n", b.x, b.y, b.z); + double a0,a1,a2; + a0 = std::abs(a.x - b.x); + a1 = std::abs(a.y - b.y); + a2 = std::abs(a.z - b.z); + double prec_val = 1.0e-5; + if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1.0e-15; + return ((a0 < prec_val) && (a1 < prec_val) && (a2 < prec_val)); } -TEST_F(GaugeAlgTest, Coulomb_FFT) +bool checkDeterminant(double2 detu) { - if (execute) { - if (!comm_partitioned()) { - printfQuda("Coulomb gauge fixing with steepest descent method with FFTs\n"); - gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, - gf_theta_condition); - auto plaq_gf = plaquette(*U); - printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z); - printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); - ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); - saveTuneCache(); - } - } + printfQuda("Det: %.16e: %.16e\n", detu.x, detu.y); + double prec_val = 5e-8; + if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1.0e-15; + return std::abs(1.0 - detu.x) < prec_val && std::abs(detu.y) < prec_val; } int main(int argc, char **argv) @@ -443,4 +425,5 @@ int main(int argc, char **argv) finalizeComms(); return test_rc; + } diff --git a/tests/su3_test.cpp b/tests/su3_test.cpp index 413d73541d..8ff7d5128f 100644 --- a/tests/su3_test.cpp +++ b/tests/su3_test.cpp @@ -85,26 +85,22 @@ int main(int argc, char **argv) setWilsonGaugeParam(gauge_param); gauge_param.t_boundary = QUDA_PERIODIC_T; setDims(gauge_param.X); - + + // All user inputs now defined + display_test_info(); + + // *** QUDA parameters begin here. void *gauge[4], *new_gauge[4]; - for (int dir = 0; dir < 4; dir++) { gauge[dir] = safe_malloc(V * gauge_site_size * host_gauge_data_type_size); new_gauge[dir] = safe_malloc(V * gauge_site_size * host_gauge_data_type_size); } - - initQuda(device_ordinal); - - setVerbosity(verbosity); - - // call srand() with a rank-dependent seed - initRand(); - + constructHostGaugeField(gauge, gauge_param, argc, argv); // Load the gauge field to the device loadGaugeQuda((void *)gauge, &gauge_param); saveGaugeQuda(new_gauge, &gauge_param); - + double plaq[3]; plaqQuda(plaq); printfQuda("Computed plaquette gauge precise is %.16e (spatial = %.16e, temporal = %.16e)\n", plaq[0], plaq[1], @@ -112,9 +108,6 @@ int main(int argc, char **argv) #ifdef GPU_GAUGE_TOOLS - // All user inputs now defined - display_test_info(); - // Topological charge and gauge energy double q_charge_check = 0.0; // Size of floating point data From 01e2726404a8dff2a5d2d9c7827109423683e1b3 Mon Sep 17 00:00:00 2001 From: cpviolator Date: Fri, 30 Apr 2021 18:48:50 -0700 Subject: [PATCH 09/32] Move the gauge_alg_test to a ctest, make a new interface to the gauge fixing that allows for fine grained control and gauge IO. --- tests/gauge_alg_test.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/gauge_alg_test.cpp b/tests/gauge_alg_test.cpp index d97f4c41bf..b88e4aa7b7 100644 --- a/tests/gauge_alg_test.cpp +++ b/tests/gauge_alg_test.cpp @@ -11,6 +11,7 @@ #include #include #include + #include #include @@ -422,8 +423,7 @@ int main(int argc, char **argv) endQuda(); - finalizeComms(); - - return test_rc; - + finalizeComms() + + return test_rc; } From a4fee79136b7bb8201480c6dcea77ffca33b9e1c Mon Sep 17 00:00:00 2001 From: cpviolator Date: Fri, 30 Apr 2021 18:54:13 -0700 Subject: [PATCH 10/32] clang tidy --- lib/interface_quda.cpp | 7 ++-- tests/gauge_alg_ctest.cpp | 80 ++++++++++++++++++++------------------- tests/gauge_alg_test.cpp | 4 +- tests/heatbath_test.cpp | 2 +- tests/su3_test.cpp | 13 ++++--- 5 files changed, 56 insertions(+), 50 deletions(-) diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp index d873a89e72..7f75793180 100644 --- a/lib/interface_quda.cpp +++ b/lib/interface_quda.cpp @@ -5541,7 +5541,7 @@ int computeGaugeFixingOVRQuda(void *gauge, const unsigned int gauge_dir, const u double *timeinfo) { profileGaugeFixOVR.TPSTART(QUDA_PROFILE_TOTAL); - + checkGaugeParam(param); profileGaugeFixOVR.TPSTART(QUDA_PROFILE_INIT); @@ -5579,7 +5579,7 @@ int computeGaugeFixingOVRQuda(void *gauge, const unsigned int gauge_dir, const u copyExtendedGauge(*cudaInGauge, *cudaInGaugeEx, QUDA_CUDA_FIELD_LOCATION); } - + // Copy the gauge field back to the host profileGaugeFixOVR.TPSTART(QUDA_PROFILE_D2H); cudaInGauge->saveCPUField(*cpuGauge); @@ -5623,7 +5623,6 @@ int computeGaugeFixingFFTQuda(void* gauge, const unsigned int gauge_dir, const gParam.setPrecision(gParam.Precision(), true); auto *cudaInGauge = new cudaGaugeField(gParam); - profileGaugeFixFFT.TPSTOP(QUDA_PROFILE_INIT); profileGaugeFixFFT.TPSTART(QUDA_PROFILE_H2D); @@ -5652,7 +5651,7 @@ int computeGaugeFixingFFTQuda(void* gauge, const unsigned int gauge_dir, const } else { delete cudaInGauge; } - + if (timeinfo) { timeinfo[0] = profileGaugeFixFFT.Last(QUDA_PROFILE_H2D); timeinfo[1] = profileGaugeFixFFT.Last(QUDA_PROFILE_COMPUTE); diff --git a/tests/gauge_alg_ctest.cpp b/tests/gauge_alg_ctest.cpp index 410d2304bd..cdaa2efd9a 100644 --- a/tests/gauge_alg_ctest.cpp +++ b/tests/gauge_alg_ctest.cpp @@ -22,12 +22,11 @@ using namespace quda; class GaugeAlgTest : public ::testing::Test -{ - protected: - +{ +protected: QudaGaugeParam param; - - Timer a0,a1; + + Timer a0, a1; double2 detu; double3 plaq; cudaGaugeField *U; @@ -37,20 +36,17 @@ class GaugeAlgTest : public ::testing::Test bool coldstart; double beta_value; - RNG * randstates; + RNG *randstates; - - void SetReunitarizationConsts(){ + void SetReunitarizationConsts() + { const double unitarize_eps = 1e-14; const double max_error = 1e-10; const int reunit_allow_svd = 1; - const int reunit_svd_only = 0; + const int reunit_svd_only = 0; const double svd_rel_error = 1e-6; const double svd_abs_error = 1e-6; - setUnitarizeLinksConstants(unitarize_eps, max_error, - reunit_allow_svd, reunit_svd_only, - svd_rel_error, svd_abs_error); - + setUnitarizeLinksConstants(unitarize_eps, max_error, reunit_allow_svd, reunit_svd_only, svd_rel_error, svd_abs_error); } bool checkDimsPartitioned() @@ -60,8 +56,9 @@ class GaugeAlgTest : public ::testing::Test return false; } - bool comparePlaquette(double3 a, double3 b){ - double a0,a1,a2; + bool comparePlaquette(double3 a, double3 b) + { + double a0, a1, a2; a0 = std::abs(a.x - b.x); a1 = std::abs(a.y - b.y); a2 = std::abs(a.z - b.z); @@ -71,14 +68,16 @@ class GaugeAlgTest : public ::testing::Test return false; } - bool CheckDeterminant(double2 detu){ + bool CheckDeterminant(double2 detu) + { double prec_val = 5e-8; if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1.0e-15; if (std::abs(1.0 - detu.x) < prec_val && std::abs(detu.y) < prec_val) return true; return false; } - virtual void SetUp() { + virtual void SetUp() + { setVerbosity(QUDA_VERBOSE); param = newQudaGaugeParam(); @@ -100,7 +99,7 @@ class GaugeAlgTest : public ::testing::Test param.X[3] = tdim; setDims(param.X); - param.anisotropy = 1.0; //don't support anisotropy for now!!!!!! + param.anisotropy = 1.0; // don't support anisotropy for now!!!!!! param.t_boundary = QUDA_PERIODIC_T; param.gauge_fix = QUDA_GAUGE_FIXED_NO; param.ga_pad = 0; @@ -108,25 +107,25 @@ class GaugeAlgTest : public ::testing::Test GaugeFieldParam gParam(0, param); gParam.pad = 0; gParam.ghostExchange = QUDA_GHOST_EXCHANGE_NO; - gParam.create = QUDA_NULL_FIELD_CREATE; - gParam.link_type = param.type; + gParam.create = QUDA_NULL_FIELD_CREATE; + gParam.link_type = param.type; gParam.reconstruct = param.reconstruct; gParam.setPrecision(gParam.Precision(), true); #ifdef MULTI_GPU int y[4]; - int R[4] = {0,0,0,0}; - for(int dir=0; dir<4; ++dir) if(comm_dim_partitioned(dir)) R[dir] = 2; - for(int dir=0; dir<4; ++dir) y[dir] = param.X[dir] + 2 * R[dir]; + int R[4] = {0, 0, 0, 0}; + for (int dir = 0; dir < 4; ++dir) + if (comm_dim_partitioned(dir)) R[dir] = 2; + for (int dir = 0; dir < 4; ++dir) y[dir] = param.X[dir] + 2 * R[dir]; int pad = 0; - GaugeFieldParam gParamEx(y, prec, link_recon, - pad, QUDA_VECTOR_GEOMETRY, QUDA_GHOST_EXCHANGE_EXTENDED); + GaugeFieldParam gParamEx(y, prec, link_recon, pad, QUDA_VECTOR_GEOMETRY, QUDA_GHOST_EXCHANGE_EXTENDED); gParamEx.create = QUDA_ZERO_FIELD_CREATE; gParamEx.order = gParam.order; gParamEx.siteSubset = QUDA_FULL_SITE_SUBSET; gParamEx.t_boundary = gParam.t_boundary; gParamEx.nFace = 1; - for(int dir=0; dir<4; ++dir) gParamEx.r[dir] = R[dir]; + for (int dir = 0; dir < 4; ++dir) gParamEx.r[dir] = R[dir]; U = new cudaGaugeField(gParamEx); #else U = new cudaGaugeField(gParam); @@ -156,11 +155,11 @@ class GaugeAlgTest : public ::testing::Test SetReunitarizationConsts(); plaquette(*U); - for(int step=1; step<=nsteps; ++step){ - printfQuda("Step %d\n",step); + for (int step = 1; step <= nsteps; ++step) { + printfQuda("Step %d\n", step); Monte(*U, *randstates, beta_value, nhbsteps, novrsteps); - //Reunitarize gauge links... + // Reunitarize gauge links... *num_failures_h = 0; unitarizeLinks(*U, num_failures_d); qudaDeviceSynchronize(); @@ -177,14 +176,15 @@ class GaugeAlgTest : public ::testing::Test host_free(num_failures_h); } - virtual void TearDown() { + virtual void TearDown() + { detu = getLinkDeterminant(*U); double2 tru = getLinkTrace(*U); printfQuda("Det: %.16e:%.16e\n", detu.x, detu.y); - printfQuda("Tr: %.16e:%.16e\n", tru.x/3.0, tru.y/3.0); + printfQuda("Tr: %.16e:%.16e\n", tru.x / 3.0, tru.y / 3.0); delete U; - //Release all temporary memory used for data exchange between GPUs in multi-GPU mode + // Release all temporary memory used for data exchange between GPUs in multi-GPU mode PGaugeExchangeFree(); a0.Stop(__func__, __FILE__, __LINE__); @@ -203,7 +203,8 @@ TEST_F(GaugeAlgTest, Generation) TEST_F(GaugeAlgTest, Landau_Overrelaxation) { printfQuda("Landau gauge fixing with overrelaxation\n"); - gaugeFixingOVR(*U, 4, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, gf_theta_condition); + gaugeFixingOVR(*U, 4, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, + gf_theta_condition); auto plaq_gf = plaquette(*U); printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); @@ -212,7 +213,8 @@ TEST_F(GaugeAlgTest, Landau_Overrelaxation) TEST_F(GaugeAlgTest, Coulomb_Overrelaxation) { printfQuda("Coulomb gauge fixing with overrelaxation\n"); - gaugeFixingOVR(*U, 3, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, gf_theta_condition); + gaugeFixingOVR(*U, 3, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, + gf_theta_condition); auto plaq_gf = plaquette(*U); printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); @@ -222,7 +224,8 @@ TEST_F(GaugeAlgTest, Landau_FFT) { if (!checkDimsPartitioned()) { printfQuda("Landau gauge fixing with steepest descent method with FFTs\n"); - gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, gf_theta_condition); + gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, + gf_theta_condition); auto plaq_gf = plaquette(*U); printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); @@ -233,7 +236,8 @@ TEST_F(GaugeAlgTest, Coulomb_FFT) { if (!checkDimsPartitioned()) { printfQuda("Coulomb gauge fixing with steepest descent method with FFTs\n"); - gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, gf_theta_condition); + gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, + gf_theta_condition); auto plaq_gf = plaquette(*U); printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); @@ -246,9 +250,9 @@ int main(int argc, char **argv) ::testing::InitGoogleTest(&argc, argv); // return code for google test int test_rc = 0; - xdim=ydim=zdim=tdim=32; + xdim = ydim = zdim = tdim = 32; - // command line options + // command line options auto app = make_app(); add_gaugefix_option_group(app); add_heatbath_option_group(app); diff --git a/tests/gauge_alg_test.cpp b/tests/gauge_alg_test.cpp index b88e4aa7b7..5728dd1141 100644 --- a/tests/gauge_alg_test.cpp +++ b/tests/gauge_alg_test.cpp @@ -340,8 +340,8 @@ bool checkDimsPartitioned() bool comparePlaquette(double3 a, double3 b) { printfQuda("Plaq: %.16e, %.16e, %.16e\n", a.x, a.y, a.z); - printfQuda("Plaq_gf: %.16e, %.16e, %.16e\n", b.x, b.y, b.z); - double a0,a1,a2; + printfQuda("Plaq_gf: %.16e, %.16e, %.16e\n", b.x, b.y, b.z); + double a0, a1, a2; a0 = std::abs(a.x - b.x); a1 = std::abs(a.y - b.y); a2 = std::abs(a.z - b.z); diff --git a/tests/heatbath_test.cpp b/tests/heatbath_test.cpp index 37588df0ba..e3aa2cb349 100644 --- a/tests/heatbath_test.cpp +++ b/tests/heatbath_test.cpp @@ -53,7 +53,7 @@ void display_test_info() } int main(int argc, char **argv) -{ +{ // command line options auto app = make_app(); add_heatbath_option_group(app); diff --git a/tests/su3_test.cpp b/tests/su3_test.cpp index 8ff7d5128f..d2a540ae4d 100644 --- a/tests/su3_test.cpp +++ b/tests/su3_test.cpp @@ -82,25 +82,28 @@ int main(int argc, char **argv) if (prec_sloppy == QUDA_INVALID_PRECISION) prec_sloppy = prec; if (link_recon_sloppy == QUDA_RECONSTRUCT_INVALID) link_recon_sloppy = link_recon; + initQuda(device_ordinal); + setVerbosity(verbosity); + setWilsonGaugeParam(gauge_param); gauge_param.t_boundary = QUDA_PERIODIC_T; setDims(gauge_param.X); - + // All user inputs now defined display_test_info(); - - // *** QUDA parameters begin here. + + // *** QUDA parameters begin here. void *gauge[4], *new_gauge[4]; for (int dir = 0; dir < 4; dir++) { gauge[dir] = safe_malloc(V * gauge_site_size * host_gauge_data_type_size); new_gauge[dir] = safe_malloc(V * gauge_site_size * host_gauge_data_type_size); } - + constructHostGaugeField(gauge, gauge_param, argc, argv); // Load the gauge field to the device loadGaugeQuda((void *)gauge, &gauge_param); saveGaugeQuda(new_gauge, &gauge_param); - + double plaq[3]; plaqQuda(plaq); printfQuda("Computed plaquette gauge precise is %.16e (spatial = %.16e, temporal = %.16e)\n", plaq[0], plaq[1], From 15b7e4c30cfd58d5a629e9c664c770c5f1e89cc0 Mon Sep 17 00:00:00 2001 From: cpviolator Date: Sat, 1 May 2021 17:59:56 -0700 Subject: [PATCH 11/32] Allow for single case testing in gauge_alg_ctest, minor clean up of gauge fixing stdout and comments --- lib/gauge_fix_fft.cu | 2 +- tests/CMakeLists.txt | 10 +- tests/gauge_alg_ctest.cpp | 443 +++++++++++++++++++++++++------------- 3 files changed, 298 insertions(+), 157 deletions(-) diff --git a/lib/gauge_fix_fft.cu b/lib/gauge_fix_fft.cu index ac991ea63c..23f924d28b 100644 --- a/lib/gauge_fix_fft.cu +++ b/lib/gauge_fix_fft.cu @@ -193,7 +193,7 @@ namespace quda { if (getVerbosity() >= QUDA_SUMMARIZE) { printfQuda("\tAuto tune active: %s\n", autotune ? "true" : "false"); printfQuda("\tAlpha parameter of the Steepest Descent Method: %e\n", alpha0); - printfQuda("\tTolerance: %lf\n", tolerance); + printfQuda("\tTolerance: %e\n", tolerance); printfQuda("\tStop criterion method: %s\n", stopWtheta ? "Theta" : "Delta"); printfQuda("\tMaximum number of iterations: %d\n", Nsteps); printfQuda("\tPrint convergence results at every %d steps\n", verbose_interval); diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 6b9d21e1d5..764cc6af97 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -223,14 +223,10 @@ if(QUDA_FORCE_GAUGE) endif() if(QUDA_GAUGE_ALG) - add_executable(gauge_alg_test gauge_alg_test.cpp) - target_link_libraries(gauge_alg_test ${TEST_LIBS}) - quda_checkbuildtest(gauge_alg_test QUDA_BUILD_ALL_TESTS) - add_executable(gauge_alg_ctest gauge_alg_ctest.cpp) target_link_libraries(gauge_alg_ctest ${TEST_LIBS}) quda_checkbuildtest(gauge_alg_ctest QUDA_BUILD_ALL_TESTS) - install(TARGETS gauge_alg_test ${QUDA_EXCLUDE_FROM_INSTALL} DESTINATION ${CMAKE_INSTALL_BINDIR}) + install(TARGETS gauge_alg_ctest ${QUDA_EXCLUDE_FROM_INSTALL} DESTINATION ${CMAKE_INSTALL_BINDIR}) add_executable(heatbath_test heatbath_test.cpp) target_link_libraries(heatbath_test ${TEST_LIBS}) @@ -816,8 +812,8 @@ foreach(prec IN LISTS TEST_PRECS) if(QUDA_GAUGE_ALG) add_test(NAME gauge_alg_${prec} - COMMAND ${QUDA_CTEST_LAUNCH} $ ${MPIEXEC_POSTFLAGS} - --dim 4 6 8 10 --prec ${prec} + COMMAND ${QUDA_CTEST_LAUNCH} $ ${MPIEXEC_POSTFLAGS} + --dim 2 4 6 8 --prec ${prec} --gtest_output=xml:gauge_arg_test_${prec}.xml) endif() diff --git a/tests/gauge_alg_ctest.cpp b/tests/gauge_alg_ctest.cpp index cdaa2efd9a..881d53bb76 100644 --- a/tests/gauge_alg_ctest.cpp +++ b/tests/gauge_alg_ctest.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -21,6 +22,42 @@ using namespace quda; +//***********************************************************// +// This boolean controls whether or not the full Google test // +// is done. If the user passes a value of 1 or 2 to --test // +// then a single instance of OVR or FFT gauge fixing is done // +// and the value of this bool is set to false. Otherwise the // +// Google tests are performed. // +//***********************************************************// +bool execute = true; + +void display_test_info() +{ + printfQuda("running the following test:\n"); + + switch (test_type) { + case 0: + printfQuda("\n Google testing\n"); + break; + case 1: + printfQuda("\nOVR gauge fix\n"); + break; + case 2: + printfQuda("\nFFT gauge fix\n"); + break; + default: errorQuda("Undefined test type %d given", test_type); + } + + printfQuda("prec sloppy_prec link_recon sloppy_link_recon S_dimension T_dimension Ls_dimension\n"); + printfQuda("%s %s %s %s %d/%d/%d %d %d\n", get_prec_str(prec), + get_prec_str(prec_sloppy), get_recon_str(link_recon), get_recon_str(link_recon_sloppy), xdim, ydim, zdim, + tdim, Lsdim); + + printfQuda("Grid partition info: X Y Z T\n"); + printfQuda(" %d %d %d %d\n", dimPartitioned(0), dimPartitioned(1), dimPartitioned(2), + dimPartitioned(3)); +} + class GaugeAlgTest : public ::testing::Test { protected: @@ -35,7 +72,9 @@ class GaugeAlgTest : public ::testing::Test int novrsteps; bool coldstart; double beta_value; - + + bool unit_test; + RNG *randstates; void SetReunitarizationConsts() @@ -64,45 +103,195 @@ class GaugeAlgTest : public ::testing::Test a2 = std::abs(a.z - b.z); double prec_val = 1.0e-5; if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1.0e-15; - if ((a0 < prec_val) && (a1 < prec_val) && (a2 < prec_val)) return true; - return false; + return ((a0 < prec_val) && (a1 < prec_val) && (a2 < prec_val)); } bool CheckDeterminant(double2 detu) { double prec_val = 5e-8; if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1.0e-15; - if (std::abs(1.0 - detu.x) < prec_val && std::abs(detu.y) < prec_val) return true; - return false; + return (std::abs(1.0 - detu.x) < prec_val && std::abs(detu.y) < prec_val); } virtual void SetUp() { - setVerbosity(QUDA_VERBOSE); - - param = newQudaGaugeParam(); - - // Setup gauge container. - param.cpu_prec = prec; - param.cpu_prec = prec; - param.cuda_prec = prec; - param.reconstruct = link_recon; - param.cuda_prec_sloppy = prec; - param.reconstruct_sloppy = link_recon; - - param.type = QUDA_WILSON_LINKS; - param.gauge_order = QUDA_MILC_GAUGE_ORDER; - - param.X[0] = xdim; - param.X[1] = ydim; - param.X[2] = zdim; - param.X[3] = tdim; - setDims(param.X); + if(execute) { + setVerbosity(QUDA_VERBOSE); + param = newQudaGaugeParam(); + + // Setup gauge container. + param.cpu_prec = prec; + param.cpu_prec = prec; + param.cuda_prec = prec; + param.reconstruct = link_recon; + param.cuda_prec_sloppy = prec; + param.reconstruct_sloppy = link_recon; + + param.type = QUDA_WILSON_LINKS; + param.gauge_order = QUDA_MILC_GAUGE_ORDER; + + param.X[0] = xdim; + param.X[1] = ydim; + param.X[2] = zdim; + param.X[3] = tdim; + setDims(param.X); + + param.anisotropy = 1.0; // don't support anisotropy for now!!!!!! + param.t_boundary = QUDA_PERIODIC_T; + param.gauge_fix = QUDA_GAUGE_FIXED_NO; + param.ga_pad = 0; + + GaugeFieldParam gParam(0, param); + gParam.pad = 0; + gParam.ghostExchange = QUDA_GHOST_EXCHANGE_NO; + gParam.create = QUDA_NULL_FIELD_CREATE; + gParam.link_type = param.type; + gParam.reconstruct = param.reconstruct; + gParam.setPrecision(gParam.Precision(), true); + +#ifdef MULTI_GPU + int y[4]; + int R[4] = {0, 0, 0, 0}; + for (int dir = 0; dir < 4; ++dir) + if (comm_dim_partitioned(dir)) R[dir] = 2; + for (int dir = 0; dir < 4; ++dir) y[dir] = param.X[dir] + 2 * R[dir]; + int pad = 0; + GaugeFieldParam gParamEx(y, prec, link_recon, pad, QUDA_VECTOR_GEOMETRY, QUDA_GHOST_EXCHANGE_EXTENDED); + gParamEx.create = QUDA_ZERO_FIELD_CREATE; + gParamEx.order = gParam.order; + gParamEx.siteSubset = QUDA_FULL_SITE_SUBSET; + gParamEx.t_boundary = gParam.t_boundary; + gParamEx.nFace = 1; + for (int dir = 0; dir < 4; ++dir) gParamEx.r[dir] = R[dir]; + U = new cudaGaugeField(gParamEx); +#else + U = new cudaGaugeField(gParam); +#endif + // CURAND random generator initialization + randstates = new RNG(gParam, 1234); + randstates->Init(); + + nsteps = heatbath_num_steps; + nhbsteps = heatbath_num_heatbath_per_step; + novrsteps = heatbath_num_overrelax_per_step; + coldstart = heatbath_coldstart; + beta_value = heatbath_beta_value; + + a0.Start(__func__, __FILE__, __LINE__); + a1.Start(__func__, __FILE__, __LINE__); + + int *num_failures_h = (int *)mapped_malloc(sizeof(int)); + int *num_failures_d = (int *)get_mapped_device_pointer(num_failures_h); + + if (link_recon != QUDA_RECONSTRUCT_8 && coldstart) + InitGaugeField(*U); + else + InitGaugeField(*U, *randstates); + + // Reunitarization setup + SetReunitarizationConsts(); + plaquette(*U); + + for (int step = 1; step <= nsteps; ++step) { + printfQuda("Step %d\n", step); + Monte(*U, *randstates, beta_value, nhbsteps, novrsteps); + + // Reunitarize gauge links... + *num_failures_h = 0; + unitarizeLinks(*U, num_failures_d); + qudaDeviceSynchronize(); + if (*num_failures_h > 0) errorQuda("Error in the unitarization\n"); + + plaquette(*U); + } + a1.Stop(__func__, __FILE__, __LINE__); + + printfQuda("Time Monte -> %.6f s\n", a1.Last()); + plaq = plaquette(*U); + printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z); + + host_free(num_failures_h); + + // If a specific test type is requested, perfrom it now and then + // turn off all Google tests in the tear down. + switch (test_type) { + case 0: + // Do the Google testing + break; + case 1: + run_ovr(); + break; + case 2: + run_fft(); + break; + default: + errorQuda("Invalid test type %d ", test_type); + } + } + } + + virtual void TearDown() + { + if(execute) { + detu = getLinkDeterminant(*U); + double2 tru = getLinkTrace(*U); + printfQuda("Det: %.16e:%.16e\n", detu.x, detu.y); + printfQuda("Tr: %.16e:%.16e\n", tru.x / 3.0, tru.y / 3.0); + + delete U; + // Release all temporary memory used for data exchange between GPUs in multi-GPU mode + PGaugeExchangeFree(); + + a0.Stop(__func__, __FILE__, __LINE__); + printfQuda("Time -> %.6f s\n", a0.Last()); + randstates->Release(); + delete randstates; + } + // If we performed a specific instance, switch off the + // Google testing. + if(test_type != 0) execute = false; + } + + virtual void run_ovr() + { + if(execute) { + gaugeFixingOVR(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, + gf_theta_condition); + auto plaq_gf = plaquette(*U); + printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); + ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); + // Save if output string is specified + if (strcmp(gauge_outfile, "")) save_gauge(); + } + } + virtual void run_fft() + { + if(execute) { + if (!checkDimsPartitioned()) { + printfQuda("Landau gauge fixing with steepest descent method with FFTs\n"); + gaugeFixingFFT(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, + gf_theta_condition); + + auto plaq_gf = plaquette(*U); + printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); + ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); + // Save if output string is specified + if (strcmp(gauge_outfile, "")) save_gauge(); + } else { + errorQuda("Cannot perform FFT gauge fixing with MPI partitions."); + } + } + } - param.anisotropy = 1.0; // don't support anisotropy for now!!!!!! - param.t_boundary = QUDA_PERIODIC_T; - param.gauge_fix = QUDA_GAUGE_FIXED_NO; - param.ga_pad = 0; + virtual void save_gauge() { + + printfQuda("Saving the gauge field to file %s\n", gauge_outfile); + + QudaGaugeParam gauge_param = newQudaGaugeParam(); + setWilsonGaugeParam(gauge_param); + + void *cpu_gauge[4]; + for (int dir = 0; dir < 4; dir++) { cpu_gauge[dir] = malloc(V * gauge_site_size * gauge_param.cpu_prec); } GaugeFieldParam gParam(0, param); gParam.pad = 0; @@ -111,168 +300,124 @@ class GaugeAlgTest : public ::testing::Test gParam.link_type = param.type; gParam.reconstruct = param.reconstruct; gParam.setPrecision(gParam.Precision(), true); - -#ifdef MULTI_GPU - int y[4]; - int R[4] = {0, 0, 0, 0}; - for (int dir = 0; dir < 4; ++dir) - if (comm_dim_partitioned(dir)) R[dir] = 2; - for (int dir = 0; dir < 4; ++dir) y[dir] = param.X[dir] + 2 * R[dir]; - int pad = 0; - GaugeFieldParam gParamEx(y, prec, link_recon, pad, QUDA_VECTOR_GEOMETRY, QUDA_GHOST_EXCHANGE_EXTENDED); - gParamEx.create = QUDA_ZERO_FIELD_CREATE; - gParamEx.order = gParam.order; - gParamEx.siteSubset = QUDA_FULL_SITE_SUBSET; - gParamEx.t_boundary = gParam.t_boundary; - gParamEx.nFace = 1; - for (int dir = 0; dir < 4; ++dir) gParamEx.r[dir] = R[dir]; - U = new cudaGaugeField(gParamEx); -#else - U = new cudaGaugeField(gParam); -#endif - // CURAND random generator initialization - randstates = new RNG(gParam, 1234); - randstates->Init(); - - nsteps = heatbath_num_steps; - nhbsteps = heatbath_num_heatbath_per_step; - novrsteps = heatbath_num_overrelax_per_step; - coldstart = heatbath_coldstart; - beta_value = heatbath_beta_value; - - a0.Start(__func__, __FILE__, __LINE__); - a1.Start(__func__, __FILE__, __LINE__); - - int *num_failures_h = (int *)mapped_malloc(sizeof(int)); - int *num_failures_d = (int *)get_mapped_device_pointer(num_failures_h); - - if (link_recon != QUDA_RECONSTRUCT_8 && coldstart) - InitGaugeField(*U); - else - InitGaugeField(*U, *randstates); - - // Reunitarization setup - SetReunitarizationConsts(); - plaquette(*U); - - for (int step = 1; step <= nsteps; ++step) { - printfQuda("Step %d\n", step); - Monte(*U, *randstates, beta_value, nhbsteps, novrsteps); - - // Reunitarize gauge links... - *num_failures_h = 0; - unitarizeLinks(*U, num_failures_d); - qudaDeviceSynchronize(); - if (*num_failures_h > 0) errorQuda("Error in the unitarization\n"); - - plaquette(*U); - } - a1.Stop(__func__, __FILE__, __LINE__); - - printfQuda("Time Monte -> %.6f s\n", a1.Last()); - plaq = plaquette(*U); - printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z); - - host_free(num_failures_h); - } - - virtual void TearDown() - { - detu = getLinkDeterminant(*U); - double2 tru = getLinkTrace(*U); - printfQuda("Det: %.16e:%.16e\n", detu.x, detu.y); - printfQuda("Tr: %.16e:%.16e\n", tru.x / 3.0, tru.y / 3.0); - - delete U; - // Release all temporary memory used for data exchange between GPUs in multi-GPU mode - PGaugeExchangeFree(); - - a0.Stop(__func__, __FILE__, __LINE__); - printfQuda("Time -> %.6f s\n", a0.Last()); - randstates->Release(); - delete randstates; + + cudaGaugeField *gauge; + gauge = new cudaGaugeField(gParam); + + // copy into regular field + copyExtendedGauge(*gauge, *U, QUDA_CUDA_FIELD_LOCATION); + saveGaugeFieldQuda((void *)cpu_gauge, (void *)gauge, &gauge_param); + + // Write to disk + write_gauge_field(gauge_outfile, cpu_gauge, gauge_param.cpu_prec, gauge_param.X, 0, (char **)0); + + for (int dir = 0; dir < 4; dir++) free(cpu_gauge[dir]); + delete gauge; } }; - + TEST_F(GaugeAlgTest, Generation) { - detu = getLinkDeterminant(*U); - ASSERT_TRUE(CheckDeterminant(detu)); + if(execute) { + detu = getLinkDeterminant(*U); + ASSERT_TRUE(CheckDeterminant(detu)); + } } TEST_F(GaugeAlgTest, Landau_Overrelaxation) { - printfQuda("Landau gauge fixing with overrelaxation\n"); - gaugeFixingOVR(*U, 4, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, - gf_theta_condition); - auto plaq_gf = plaquette(*U); - printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); - ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); + if(execute) { + printfQuda("Landau gauge fixing with overrelaxation\n"); + gaugeFixingOVR(*U, 4, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, + gf_theta_condition); + auto plaq_gf = plaquette(*U); + printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); + ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); + } } TEST_F(GaugeAlgTest, Coulomb_Overrelaxation) { - printfQuda("Coulomb gauge fixing with overrelaxation\n"); - gaugeFixingOVR(*U, 3, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, - gf_theta_condition); - auto plaq_gf = plaquette(*U); - printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); - ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); + if(execute) { + printfQuda("Coulomb gauge fixing with overrelaxation\n"); + gaugeFixingOVR(*U, 3, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, + gf_theta_condition); + auto plaq_gf = plaquette(*U); + printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); + ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); + } } TEST_F(GaugeAlgTest, Landau_FFT) { - if (!checkDimsPartitioned()) { - printfQuda("Landau gauge fixing with steepest descent method with FFTs\n"); - gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, - gf_theta_condition); - auto plaq_gf = plaquette(*U); - printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); - ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); + if(execute) { + if (!checkDimsPartitioned()) { + printfQuda("Landau gauge fixing with steepest descent method with FFTs\n"); + gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, + gf_theta_condition); + auto plaq_gf = plaquette(*U); + printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); + ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); + } } } TEST_F(GaugeAlgTest, Coulomb_FFT) { - if (!checkDimsPartitioned()) { - printfQuda("Coulomb gauge fixing with steepest descent method with FFTs\n"); - gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, - gf_theta_condition); - auto plaq_gf = plaquette(*U); - printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); - ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); + if(execute) { + if (!checkDimsPartitioned()) { + printfQuda("Coulomb gauge fixing with steepest descent method with FFTs\n"); + gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, + gf_theta_condition); + auto plaq_gf = plaquette(*U); + printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); + ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); + } } } int main(int argc, char **argv) { - // initalize google test, includes command line options - ::testing::InitGoogleTest(&argc, argv); - // return code for google test - int test_rc = 0; - xdim = ydim = zdim = tdim = 32; - // command line options auto app = make_app(); add_gaugefix_option_group(app); add_heatbath_option_group(app); + + test_type = 0; + CLI::TransformPairs test_type_map {{"Google", 0}, {"OVR", 1}, {"FFT", 2}}; + app->add_option("--test", test_type, "Test method")->transform(CLI::CheckedTransformer(test_type_map)); try { app->parse(argc, argv); } catch (const CLI::ParseError &e) { return app->exit(e); } + if (prec_sloppy == QUDA_INVALID_PRECISION) prec_sloppy = prec; + if (link_recon_sloppy == QUDA_RECONSTRUCT_INVALID) link_recon_sloppy = link_recon; + + // initialize QMP/MPI, QUDA comms grid and RNG (host_utils.cpp) initComms(argc, argv, gridsize_from_cmdline); + // call srand() with a rank-dependent seed + initRand(); + + display_test_info(); + + // initialize the QUDA library + initQuda(device_ordinal); + + // initalize google test, includes command line options + ::testing::InitGoogleTest(&argc, argv); + // Ensure gtest prints only from rank 0 ::testing::TestEventListeners &listeners = ::testing::UnitTest::GetInstance()->listeners(); if (comm_rank() != 0) { delete listeners.Release(listeners.default_result_printer()); } - initQuda(device_ordinal); - test_rc = RUN_ALL_TESTS(); - endQuda(); - + // return code for google test + int test_rc = RUN_ALL_TESTS(); + + endQuda(); finalizeComms(); - + return test_rc; } From fe83fa846abd57505fa40461c0938ee48a91817a Mon Sep 17 00:00:00 2001 From: cpviolator Date: Sat, 1 May 2021 18:01:05 -0700 Subject: [PATCH 12/32] Clang tidy --- include/quda.h | 3 +- include/quda_milc_interface.h | 8 +- tests/gauge_alg_ctest.cpp | 180 ++++++++++++++++------------------ 3 files changed, 91 insertions(+), 100 deletions(-) diff --git a/include/quda.h b/include/quda.h index 95952a648c..63d09b8992 100644 --- a/include/quda.h +++ b/include/quda.h @@ -1510,7 +1510,8 @@ extern "C" { * @param[in] Nsteps, maximum number of steps to perform gauge fixing * @param[in] verbose_interval, print gauge fixing info when iteration count is a multiple of this * @param[in] relax_boost, gauge fixing parameter of the overrelaxation method, most common value is 1.5 or 1.7. - * @param[in] tolerance, torelance value to stop the method, if this value is zero then the method stops when iteration reachs the maximum number of steps defined by Nsteps + * @param[in] tolerance, torelance value to stop the method, if this value is zero then the method stops when + * iteration reachs the maximum number of steps defined by Nsteps * @param[in] reunit_interval, reunitarize gauge field when iteration count is a multiple of this * @param[in] stopWtheta, 0 for MILC criterion and 1 to use the theta value * @param[in] param The parameters of the external fields and the computation settings diff --git a/include/quda_milc_interface.h b/include/quda_milc_interface.h index 2bc8b5900e..1f45d1bae7 100644 --- a/include/quda_milc_interface.h +++ b/include/quda_milc_interface.h @@ -947,7 +947,6 @@ extern "C" { */ void qudaDestroyGaugeField(void* gauge); - /** * @brief Gauge fixing with overrelaxation with support for single and multi GPU. * @param[in] precision, 1 for single precision else for double precision @@ -955,7 +954,8 @@ extern "C" { * @param[in] Nsteps, maximum number of steps to perform gauge fixing * @param[in] verbose_interval, print gauge fixing info when iteration count is a multiple of this * @param[in] relax_boost, gauge fixing parameter of the overrelaxation method, most common value is 1.5 or 1.7. - * @param[in] tolerance, torelance value to stop the method, if this value is zero then the method stops when iteration reachs the maximum number of steps defined by Nsteps + * @param[in] tolerance, torelance value to stop the method, if this value is zero then the method stops when + * iteration reachs the maximum number of steps defined by Nsteps * @param[in] reunit_interval, reunitarize gauge field when iteration count is a multiple of this * @param[in] stopWtheta, 0 for MILC criterion and 1 to use the theta value * @param[in,out] milc_sitelink, MILC gauge field to be fixed @@ -971,7 +971,6 @@ extern "C" { void* milc_sitelink ); - /** * @brief Gauge fixing with Steepest descent method with FFTs with support for single GPU only. * @param[in] precision, 1 for single precision else for double precision @@ -980,7 +979,8 @@ extern "C" { * @param[in] verbose_interval, print gauge fixing info when iteration count is a multiple of this * @param[in] alpha, gauge fixing parameter of the method, most common value is 0.08 * @param[in] autotune, 1 to autotune the method, i.e., if the Fg inverts its tendency we decrease the alpha value - * @param[in] tolerance, torelance value to stop the method, if this value is zero then the method stops when iteration reachs the maximum number of steps defined by Nsteps + * @param[in] tolerance, torelance value to stop the method, if this value is zero then the method stops when + * iteration reachs the maximum number of steps defined by Nsteps * @param[in] stopWtheta, 0 for MILC criterion and 1 to use the theta value * @param[in,out] milc_sitelink, MILC gauge field to be fixed */ diff --git a/tests/gauge_alg_ctest.cpp b/tests/gauge_alg_ctest.cpp index 881d53bb76..f797420c9c 100644 --- a/tests/gauge_alg_ctest.cpp +++ b/tests/gauge_alg_ctest.cpp @@ -36,18 +36,12 @@ void display_test_info() printfQuda("running the following test:\n"); switch (test_type) { - case 0: - printfQuda("\n Google testing\n"); - break; - case 1: - printfQuda("\nOVR gauge fix\n"); - break; - case 2: - printfQuda("\nFFT gauge fix\n"); - break; + case 0: printfQuda("\n Google testing\n"); break; + case 1: printfQuda("\nOVR gauge fix\n"); break; + case 2: printfQuda("\nFFT gauge fix\n"); break; default: errorQuda("Undefined test type %d given", test_type); } - + printfQuda("prec sloppy_prec link_recon sloppy_link_recon S_dimension T_dimension Ls_dimension\n"); printfQuda("%s %s %s %s %d/%d/%d %d %d\n", get_prec_str(prec), get_prec_str(prec_sloppy), get_recon_str(link_recon), get_recon_str(link_recon_sloppy), xdim, ydim, zdim, @@ -72,9 +66,9 @@ class GaugeAlgTest : public ::testing::Test int novrsteps; bool coldstart; double beta_value; - + bool unit_test; - + RNG *randstates; void SetReunitarizationConsts() @@ -115,10 +109,10 @@ class GaugeAlgTest : public ::testing::Test virtual void SetUp() { - if(execute) { + if (execute) { setVerbosity(QUDA_VERBOSE); param = newQudaGaugeParam(); - + // Setup gauge container. param.cpu_prec = prec; param.cpu_prec = prec; @@ -126,21 +120,21 @@ class GaugeAlgTest : public ::testing::Test param.reconstruct = link_recon; param.cuda_prec_sloppy = prec; param.reconstruct_sloppy = link_recon; - + param.type = QUDA_WILSON_LINKS; param.gauge_order = QUDA_MILC_GAUGE_ORDER; - + param.X[0] = xdim; param.X[1] = ydim; param.X[2] = zdim; param.X[3] = tdim; setDims(param.X); - + param.anisotropy = 1.0; // don't support anisotropy for now!!!!!! param.t_boundary = QUDA_PERIODIC_T; param.gauge_fix = QUDA_GAUGE_FIXED_NO; param.ga_pad = 0; - + GaugeFieldParam gParam(0, param); gParam.pad = 0; gParam.ghostExchange = QUDA_GHOST_EXCHANGE_NO; @@ -148,12 +142,12 @@ class GaugeAlgTest : public ::testing::Test gParam.link_type = param.type; gParam.reconstruct = param.reconstruct; gParam.setPrecision(gParam.Precision(), true); - + #ifdef MULTI_GPU int y[4]; int R[4] = {0, 0, 0, 0}; for (int dir = 0; dir < 4; ++dir) - if (comm_dim_partitioned(dir)) R[dir] = 2; + if (comm_dim_partitioned(dir)) R[dir] = 2; for (int dir = 0; dir < 4; ++dir) y[dir] = param.X[dir] + 2 * R[dir]; int pad = 0; GaugeFieldParam gParamEx(y, prec, link_recon, pad, QUDA_VECTOR_GEOMETRY, QUDA_GHOST_EXCHANGE_EXTENDED); @@ -170,93 +164,88 @@ class GaugeAlgTest : public ::testing::Test // CURAND random generator initialization randstates = new RNG(gParam, 1234); randstates->Init(); - + nsteps = heatbath_num_steps; nhbsteps = heatbath_num_heatbath_per_step; novrsteps = heatbath_num_overrelax_per_step; coldstart = heatbath_coldstart; beta_value = heatbath_beta_value; - + a0.Start(__func__, __FILE__, __LINE__); a1.Start(__func__, __FILE__, __LINE__); - + int *num_failures_h = (int *)mapped_malloc(sizeof(int)); int *num_failures_d = (int *)get_mapped_device_pointer(num_failures_h); - + if (link_recon != QUDA_RECONSTRUCT_8 && coldstart) - InitGaugeField(*U); + InitGaugeField(*U); else - InitGaugeField(*U, *randstates); - + InitGaugeField(*U, *randstates); + // Reunitarization setup SetReunitarizationConsts(); plaquette(*U); - + for (int step = 1; step <= nsteps; ++step) { - printfQuda("Step %d\n", step); - Monte(*U, *randstates, beta_value, nhbsteps, novrsteps); - - // Reunitarize gauge links... - *num_failures_h = 0; - unitarizeLinks(*U, num_failures_d); - qudaDeviceSynchronize(); - if (*num_failures_h > 0) errorQuda("Error in the unitarization\n"); - - plaquette(*U); + printfQuda("Step %d\n", step); + Monte(*U, *randstates, beta_value, nhbsteps, novrsteps); + + // Reunitarize gauge links... + *num_failures_h = 0; + unitarizeLinks(*U, num_failures_d); + qudaDeviceSynchronize(); + if (*num_failures_h > 0) errorQuda("Error in the unitarization\n"); + + plaquette(*U); } a1.Stop(__func__, __FILE__, __LINE__); - + printfQuda("Time Monte -> %.6f s\n", a1.Last()); plaq = plaquette(*U); printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z); - + host_free(num_failures_h); // If a specific test type is requested, perfrom it now and then // turn off all Google tests in the tear down. switch (test_type) { case 0: - // Do the Google testing - break; - case 1: - run_ovr(); - break; - case 2: - run_fft(); - break; - default: - errorQuda("Invalid test type %d ", test_type); + // Do the Google testing + break; + case 1: run_ovr(); break; + case 2: run_fft(); break; + default: errorQuda("Invalid test type %d ", test_type); } } } - + virtual void TearDown() { - if(execute) { + if (execute) { detu = getLinkDeterminant(*U); double2 tru = getLinkTrace(*U); printfQuda("Det: %.16e:%.16e\n", detu.x, detu.y); printfQuda("Tr: %.16e:%.16e\n", tru.x / 3.0, tru.y / 3.0); - + delete U; // Release all temporary memory used for data exchange between GPUs in multi-GPU mode PGaugeExchangeFree(); - + a0.Stop(__func__, __FILE__, __LINE__); printfQuda("Time -> %.6f s\n", a0.Last()); randstates->Release(); - delete randstates; + delete randstates; } // If we performed a specific instance, switch off the // Google testing. - if(test_type != 0) execute = false; + if (test_type != 0) execute = false; } - + virtual void run_ovr() { - if(execute) { - gaugeFixingOVR(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, - gf_theta_condition); + if (execute) { + gaugeFixingOVR(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, + gf_reunit_interval, gf_theta_condition); auto plaq_gf = plaquette(*U); printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); @@ -266,30 +255,31 @@ class GaugeAlgTest : public ::testing::Test } virtual void run_fft() { - if(execute) { + if (execute) { if (!checkDimsPartitioned()) { - printfQuda("Landau gauge fixing with steepest descent method with FFTs\n"); - gaugeFixingFFT(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, - gf_theta_condition); - - auto plaq_gf = plaquette(*U); - printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); - ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); - // Save if output string is specified - if (strcmp(gauge_outfile, "")) save_gauge(); + printfQuda("Landau gauge fixing with steepest descent method with FFTs\n"); + gaugeFixingFFT(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, + gf_theta_condition); + + auto plaq_gf = plaquette(*U); + printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); + ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); + // Save if output string is specified + if (strcmp(gauge_outfile, "")) save_gauge(); } else { - errorQuda("Cannot perform FFT gauge fixing with MPI partitions."); + errorQuda("Cannot perform FFT gauge fixing with MPI partitions."); } } } - virtual void save_gauge() { - + virtual void save_gauge() + { + printfQuda("Saving the gauge field to file %s\n", gauge_outfile); - + QudaGaugeParam gauge_param = newQudaGaugeParam(); setWilsonGaugeParam(gauge_param); - + void *cpu_gauge[4]; for (int dir = 0; dir < 4; dir++) { cpu_gauge[dir] = malloc(V * gauge_site_size * gauge_param.cpu_prec); } @@ -300,25 +290,25 @@ class GaugeAlgTest : public ::testing::Test gParam.link_type = param.type; gParam.reconstruct = param.reconstruct; gParam.setPrecision(gParam.Precision(), true); - + cudaGaugeField *gauge; gauge = new cudaGaugeField(gParam); - + // copy into regular field copyExtendedGauge(*gauge, *U, QUDA_CUDA_FIELD_LOCATION); saveGaugeFieldQuda((void *)cpu_gauge, (void *)gauge, &gauge_param); - + // Write to disk write_gauge_field(gauge_outfile, cpu_gauge, gauge_param.cpu_prec, gauge_param.X, 0, (char **)0); - + for (int dir = 0; dir < 4; dir++) free(cpu_gauge[dir]); delete gauge; } }; - + TEST_F(GaugeAlgTest, Generation) { - if(execute) { + if (execute) { detu = getLinkDeterminant(*U); ASSERT_TRUE(CheckDeterminant(detu)); } @@ -326,10 +316,10 @@ TEST_F(GaugeAlgTest, Generation) TEST_F(GaugeAlgTest, Landau_Overrelaxation) { - if(execute) { + if (execute) { printfQuda("Landau gauge fixing with overrelaxation\n"); gaugeFixingOVR(*U, 4, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, - gf_theta_condition); + gf_theta_condition); auto plaq_gf = plaquette(*U); printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); @@ -338,10 +328,10 @@ TEST_F(GaugeAlgTest, Landau_Overrelaxation) TEST_F(GaugeAlgTest, Coulomb_Overrelaxation) { - if(execute) { + if (execute) { printfQuda("Coulomb gauge fixing with overrelaxation\n"); gaugeFixingOVR(*U, 3, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, - gf_theta_condition); + gf_theta_condition); auto plaq_gf = plaquette(*U); printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); @@ -350,11 +340,11 @@ TEST_F(GaugeAlgTest, Coulomb_Overrelaxation) TEST_F(GaugeAlgTest, Landau_FFT) { - if(execute) { + if (execute) { if (!checkDimsPartitioned()) { printfQuda("Landau gauge fixing with steepest descent method with FFTs\n"); gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, - gf_theta_condition); + gf_theta_condition); auto plaq_gf = plaquette(*U); printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); @@ -364,11 +354,11 @@ TEST_F(GaugeAlgTest, Landau_FFT) TEST_F(GaugeAlgTest, Coulomb_FFT) { - if(execute) { + if (execute) { if (!checkDimsPartitioned()) { printfQuda("Coulomb gauge fixing with steepest descent method with FFTs\n"); gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, - gf_theta_condition); + gf_theta_condition); auto plaq_gf = plaquette(*U); printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); @@ -382,7 +372,7 @@ int main(int argc, char **argv) auto app = make_app(); add_gaugefix_option_group(app); add_heatbath_option_group(app); - + test_type = 0; CLI::TransformPairs test_type_map {{"Google", 0}, {"OVR", 1}, {"FFT", 2}}; app->add_option("--test", test_type, "Test method")->transform(CLI::CheckedTransformer(test_type_map)); @@ -395,12 +385,12 @@ int main(int argc, char **argv) if (prec_sloppy == QUDA_INVALID_PRECISION) prec_sloppy = prec; if (link_recon_sloppy == QUDA_RECONSTRUCT_INVALID) link_recon_sloppy = link_recon; - // initialize QMP/MPI, QUDA comms grid and RNG (host_utils.cpp) + // initialize QMP/MPI, QUDA comms grid and RNG (host_utils.cpp) initComms(argc, argv, gridsize_from_cmdline); // call srand() with a rank-dependent seed initRand(); - + display_test_info(); // initialize the QUDA library @@ -415,9 +405,9 @@ int main(int argc, char **argv) // return code for google test int test_rc = RUN_ALL_TESTS(); - - endQuda(); + + endQuda(); finalizeComms(); - + return test_rc; } From 59c3beb0507301a3e466054b403c2365587bf656 Mon Sep 17 00:00:00 2001 From: cpviolator Date: Thu, 5 Aug 2021 14:35:27 -0700 Subject: [PATCH 13/32] Rebase to GK --- lib/interface_quda.cpp | 2 +- tests/gauge_alg_ctest.cpp | 330 +++++++++++++++-------------- tests/gauge_alg_test.cpp | 429 -------------------------------------- 3 files changed, 174 insertions(+), 587 deletions(-) delete mode 100644 tests/gauge_alg_test.cpp diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp index 7f75793180..73cdb76b45 100644 --- a/lib/interface_quda.cpp +++ b/lib/interface_quda.cpp @@ -5545,7 +5545,7 @@ int computeGaugeFixingOVRQuda(void *gauge, const unsigned int gauge_dir, const u checkGaugeParam(param); profileGaugeFixOVR.TPSTART(QUDA_PROFILE_INIT); - GaugeFieldParam gParam(gauge, *param); + GaugeFieldParam gParam(*param); auto *cpuGauge = new cpuGaugeField(gParam); // gParam.pad = getFatLinkPadding(param->X); diff --git a/tests/gauge_alg_ctest.cpp b/tests/gauge_alg_ctest.cpp index f797420c9c..0bfeaa8189 100644 --- a/tests/gauge_alg_ctest.cpp +++ b/tests/gauge_alg_ctest.cpp @@ -10,7 +10,10 @@ #include #include #include +#include + #include +#include #include #include @@ -31,6 +34,11 @@ using namespace quda; //***********************************************************// bool execute = true; +bool gauge_load; +bool gauge_store; + +void *host_gauge[4]; + void display_test_info() { printfQuda("running the following test:\n"); @@ -57,10 +65,10 @@ class GaugeAlgTest : public ::testing::Test protected: QudaGaugeParam param; - Timer a0, a1; + Timer a0, a1; double2 detu; double3 plaq; - cudaGaugeField *U; + GaugeField *U; int nsteps; int nhbsteps; int novrsteps; @@ -69,8 +77,6 @@ class GaugeAlgTest : public ::testing::Test bool unit_test; - RNG *randstates; - void SetReunitarizationConsts() { const double unitarize_eps = 1e-14; @@ -82,13 +88,6 @@ class GaugeAlgTest : public ::testing::Test setUnitarizeLinksConstants(unitarize_eps, max_error, reunit_allow_svd, reunit_svd_only, svd_rel_error, svd_abs_error); } - bool checkDimsPartitioned() - { - if (comm_dim_partitioned(0) || comm_dim_partitioned(1) || comm_dim_partitioned(2) || comm_dim_partitioned(3)) - return true; - return false; - } - bool comparePlaquette(double3 a, double3 b) { double a0, a1, a2; @@ -96,14 +95,14 @@ class GaugeAlgTest : public ::testing::Test a1 = std::abs(a.y - b.y); a2 = std::abs(a.z - b.z); double prec_val = 1.0e-5; - if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1.0e-15; + if (prec == QUDA_DOUBLE_PRECISION) prec_val = gf_tolerance*1e2; return ((a0 < prec_val) && (a1 < prec_val) && (a2 < prec_val)); } bool CheckDeterminant(double2 detu) { double prec_val = 5e-8; - if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1.0e-15; + if (prec == QUDA_DOUBLE_PRECISION) prec_val = gf_tolerance*1e2; return (std::abs(1.0 - detu.x) < prec_val && std::abs(detu.y) < prec_val); } @@ -114,108 +113,111 @@ class GaugeAlgTest : public ::testing::Test param = newQudaGaugeParam(); // Setup gauge container. - param.cpu_prec = prec; - param.cpu_prec = prec; - param.cuda_prec = prec; - param.reconstruct = link_recon; - param.cuda_prec_sloppy = prec; - param.reconstruct_sloppy = link_recon; - - param.type = QUDA_WILSON_LINKS; - param.gauge_order = QUDA_MILC_GAUGE_ORDER; - - param.X[0] = xdim; - param.X[1] = ydim; - param.X[2] = zdim; - param.X[3] = tdim; - setDims(param.X); - - param.anisotropy = 1.0; // don't support anisotropy for now!!!!!! + setWilsonGaugeParam(param); param.t_boundary = QUDA_PERIODIC_T; - param.gauge_fix = QUDA_GAUGE_FIXED_NO; - param.ga_pad = 0; - - GaugeFieldParam gParam(0, param); - gParam.pad = 0; - gParam.ghostExchange = QUDA_GHOST_EXCHANGE_NO; - gParam.create = QUDA_NULL_FIELD_CREATE; - gParam.link_type = param.type; - gParam.reconstruct = param.reconstruct; - gParam.setPrecision(gParam.Precision(), true); - -#ifdef MULTI_GPU - int y[4]; - int R[4] = {0, 0, 0, 0}; - for (int dir = 0; dir < 4; ++dir) - if (comm_dim_partitioned(dir)) R[dir] = 2; - for (int dir = 0; dir < 4; ++dir) y[dir] = param.X[dir] + 2 * R[dir]; - int pad = 0; - GaugeFieldParam gParamEx(y, prec, link_recon, pad, QUDA_VECTOR_GEOMETRY, QUDA_GHOST_EXCHANGE_EXTENDED); - gParamEx.create = QUDA_ZERO_FIELD_CREATE; - gParamEx.order = gParam.order; - gParamEx.siteSubset = QUDA_FULL_SITE_SUBSET; - gParamEx.t_boundary = gParam.t_boundary; - gParamEx.nFace = 1; - for (int dir = 0; dir < 4; ++dir) gParamEx.r[dir] = R[dir]; - U = new cudaGaugeField(gParamEx); -#else - U = new cudaGaugeField(gParam); -#endif - // CURAND random generator initialization - randstates = new RNG(gParam, 1234); - randstates->Init(); - - nsteps = heatbath_num_steps; - nhbsteps = heatbath_num_heatbath_per_step; - novrsteps = heatbath_num_overrelax_per_step; - coldstart = heatbath_coldstart; - beta_value = heatbath_beta_value; - - a0.Start(__func__, __FILE__, __LINE__); - a1.Start(__func__, __FILE__, __LINE__); + // Reunitarization setup int *num_failures_h = (int *)mapped_malloc(sizeof(int)); int *num_failures_d = (int *)get_mapped_device_pointer(num_failures_h); - - if (link_recon != QUDA_RECONSTRUCT_8 && coldstart) - InitGaugeField(*U); - else - InitGaugeField(*U, *randstates); - - // Reunitarization setup SetReunitarizationConsts(); - plaquette(*U); - - for (int step = 1; step <= nsteps; ++step) { - printfQuda("Step %d\n", step); - Monte(*U, *randstates, beta_value, nhbsteps, novrsteps); - // Reunitarize gauge links... - *num_failures_h = 0; - unitarizeLinks(*U, num_failures_d); - qudaDeviceSynchronize(); - if (*num_failures_h > 0) errorQuda("Error in the unitarization\n"); + a0.start(); + + // If no field is loaded, create a physical quenched field on the device + if (!gauge_load) { + GaugeFieldParam gParam(param); + gParam.ghostExchange = QUDA_GHOST_EXCHANGE_EXTENDED; + gParam.create = QUDA_NULL_FIELD_CREATE; + gParam.reconstruct = link_recon; + gParam.setPrecision(prec, true); + for (int d = 0; d < 4; d++) { + if (comm_dim_partitioned(d)) gParam.r[d] = 2; + gParam.x[d] += 2 * gParam.r[d]; + } + + U = new cudaGaugeField(gParam); + + RNG randstates(*U, 1234); + + nsteps = heatbath_num_steps; + nhbsteps = heatbath_num_heatbath_per_step; + novrsteps = heatbath_num_overrelax_per_step; + coldstart = heatbath_coldstart; + beta_value = heatbath_beta_value; + a1.start(); + + if (coldstart) + InitGaugeField(*U); + else + InitGaugeField(*U, randstates); + + for (int step = 1; step <= nsteps; ++step) { + printfQuda("Step %d\n", step); + Monte(*U, randstates, beta_value, nhbsteps, novrsteps); + + // Reunitarization + *num_failures_h = 0; + unitarizeLinks(*U, num_failures_d); + qudaDeviceSynchronize(); + if (*num_failures_h > 0) errorQuda("Error in the unitarization (%d errors)", *num_failures_h); + plaq = plaquette(*U); + printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z); + } + + a1.stop(); + printfQuda("Time Monte -> %.6f s\n", a1.last()); + } else { - plaquette(*U); + // If a field is loaded, create a device field and copy + printfQuda("Copying gauge field from host\n"); + param.location = QUDA_CPU_FIELD_LOCATION; + GaugeFieldParam gauge_field_param(param, host_gauge); + gauge_field_param.ghostExchange = QUDA_GHOST_EXCHANGE_NO; + GaugeField *host = GaugeField::Create(gauge_field_param); + + // switch the parameters for creating the mirror precise cuda gauge field + gauge_field_param.create = QUDA_NULL_FIELD_CREATE; + gauge_field_param.reconstruct = param.reconstruct; + gauge_field_param.setPrecision(param.cuda_prec, true); + + if (comm_partitioned()) { + int R[4] = {0, 0, 0, 0}; + for (int d = 0; d < 4; d++) if (comm_dim_partitioned(d)) R[d] = 2; + static TimeProfile GaugeFix("GaugeFix"); + cudaGaugeField *tmp = new cudaGaugeField(gauge_field_param); + tmp->copy(*host); + U = createExtendedGauge(*tmp, R, GaugeFix); + delete tmp; + } else { + U = new cudaGaugeField(gauge_field_param); + U->copy(*host); + } + + delete host; + + // Reunitarization + *num_failures_h = 0; + unitarizeLinks(*U, num_failures_d); + qudaDeviceSynchronize(); + if (*num_failures_h > 0) errorQuda("Error in the unitarization (%d errors)", *num_failures_h); + + plaq = plaquette(*U); + printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z); } - a1.Stop(__func__, __FILE__, __LINE__); - - printfQuda("Time Monte -> %.6f s\n", a1.Last()); - plaq = plaquette(*U); - printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z); - host_free(num_failures_h); // If a specific test type is requested, perfrom it now and then // turn off all Google tests in the tear down. switch (test_type) { case 0: - // Do the Google testing - break; + // Do the Google testing + break; case 1: run_ovr(); break; case 2: run_fft(); break; - default: errorQuda("Invalid test type %d ", test_type); + default: errorQuda("Invalid test type %d", test_type); } + + host_free(num_failures_h); } } @@ -231,10 +233,8 @@ class GaugeAlgTest : public ::testing::Test // Release all temporary memory used for data exchange between GPUs in multi-GPU mode PGaugeExchangeFree(); - a0.Stop(__func__, __FILE__, __LINE__); - printfQuda("Time -> %.6f s\n", a0.Last()); - randstates->Release(); - delete randstates; + a0.stop(); + printfQuda("Time -> %.6f s\n", a0.last()); } // If we performed a specific instance, switch off the // Google testing. @@ -247,25 +247,29 @@ class GaugeAlgTest : public ::testing::Test gaugeFixingOVR(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, gf_theta_condition); auto plaq_gf = plaquette(*U); - printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); + printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z); + printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); + saveTuneCache(); // Save if output string is specified - if (strcmp(gauge_outfile, "")) save_gauge(); + if (gauge_store) save_gauge(); } } virtual void run_fft() { if (execute) { - if (!checkDimsPartitioned()) { + if (!comm_partitioned()) { printfQuda("Landau gauge fixing with steepest descent method with FFTs\n"); gaugeFixingFFT(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, gf_theta_condition); auto plaq_gf = plaquette(*U); - printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); + printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z); + printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); + saveTuneCache(); // Save if output string is specified - if (strcmp(gauge_outfile, "")) save_gauge(); + if (gauge_store) save_gauge(); } else { errorQuda("Cannot perform FFT gauge fixing with MPI partitions."); } @@ -274,25 +278,22 @@ class GaugeAlgTest : public ::testing::Test virtual void save_gauge() { - printfQuda("Saving the gauge field to file %s\n", gauge_outfile); QudaGaugeParam gauge_param = newQudaGaugeParam(); setWilsonGaugeParam(gauge_param); void *cpu_gauge[4]; - for (int dir = 0; dir < 4; dir++) { cpu_gauge[dir] = malloc(V * gauge_site_size * gauge_param.cpu_prec); } + for (int dir = 0; dir < 4; dir++) { cpu_gauge[dir] = safe_malloc(V * gauge_site_size * gauge_param.cpu_prec); } - GaugeFieldParam gParam(0, param); - gParam.pad = 0; + GaugeFieldParam gParam(param); gParam.ghostExchange = QUDA_GHOST_EXCHANGE_NO; gParam.create = QUDA_NULL_FIELD_CREATE; gParam.link_type = param.type; gParam.reconstruct = param.reconstruct; gParam.setPrecision(gParam.Precision(), true); - cudaGaugeField *gauge; - gauge = new cudaGaugeField(gParam); + cudaGaugeField *gauge = new cudaGaugeField(gParam); // copy into regular field copyExtendedGauge(*gauge, *U, QUDA_CUDA_FIELD_LOCATION); @@ -301,14 +302,15 @@ class GaugeAlgTest : public ::testing::Test // Write to disk write_gauge_field(gauge_outfile, cpu_gauge, gauge_param.cpu_prec, gauge_param.X, 0, (char **)0); - for (int dir = 0; dir < 4; dir++) free(cpu_gauge[dir]); + for (int dir = 0; dir < 4; dir++) host_free(cpu_gauge[dir]); delete gauge; } }; + TEST_F(GaugeAlgTest, Generation) { - if (execute) { + if (execute && !gauge_load) { detu = getLinkDeterminant(*U); ASSERT_TRUE(CheckDeterminant(detu)); } @@ -321,54 +323,47 @@ TEST_F(GaugeAlgTest, Landau_Overrelaxation) gaugeFixingOVR(*U, 4, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, gf_theta_condition); auto plaq_gf = plaquette(*U); - printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); + printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z); + printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); + saveTuneCache(); } } -TEST_F(GaugeAlgTest, Coulomb_Overrelaxation) +bool checkDimsPartitioned() { - if (execute) { - printfQuda("Coulomb gauge fixing with overrelaxation\n"); - gaugeFixingOVR(*U, 3, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, - gf_theta_condition); - auto plaq_gf = plaquette(*U); - printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); - ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); - } + if (comm_dim_partitioned(0) || comm_dim_partitioned(1) || comm_dim_partitioned(2) || comm_dim_partitioned(3)) + return true; + return false; } -TEST_F(GaugeAlgTest, Landau_FFT) +bool comparePlaquette(double3 a, double3 b) { - if (execute) { - if (!checkDimsPartitioned()) { - printfQuda("Landau gauge fixing with steepest descent method with FFTs\n"); - gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, - gf_theta_condition); - auto plaq_gf = plaquette(*U); - printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); - ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); - } - } + printfQuda("Plaq: %.16e, %.16e, %.16e\n", a.x, a.y, a.z); + printfQuda("Plaq_gf: %.16e, %.16e, %.16e\n", b.x, b.y, b.z); + double a0, a1, a2; + a0 = std::abs(a.x - b.x); + a1 = std::abs(a.y - b.y); + a2 = std::abs(a.z - b.z); + double prec_val = 1.0e-5; + if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1.0e-15; + return ((a0 < prec_val) && (a1 < prec_val) && (a2 < prec_val)); } -TEST_F(GaugeAlgTest, Coulomb_FFT) +bool checkDeterminant(double2 detu) { - if (execute) { - if (!checkDimsPartitioned()) { - printfQuda("Coulomb gauge fixing with steepest descent method with FFTs\n"); - gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, - gf_theta_condition); - auto plaq_gf = plaquette(*U); - printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); - ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); - } - } + printfQuda("Det: %.16e: %.16e\n", detu.x, detu.y); + double prec_val = 5e-8; + if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1.0e-15; + return std::abs(1.0 - detu.x) < prec_val && std::abs(detu.y) < prec_val; } int main(int argc, char **argv) { - // command line options + // initalize google test, includes command line options + ::testing::InitGoogleTest(&argc, argv); + + // command line options auto app = make_app(); add_gaugefix_option_group(app); add_heatbath_option_group(app); @@ -376,29 +371,44 @@ int main(int argc, char **argv) test_type = 0; CLI::TransformPairs test_type_map {{"Google", 0}, {"OVR", 1}, {"FFT", 2}}; app->add_option("--test", test_type, "Test method")->transform(CLI::CheckedTransformer(test_type_map)); + try { app->parse(argc, argv); } catch (const CLI::ParseError &e) { return app->exit(e); } + // initialize QMP/MPI, QUDA comms grid and RNG (host_utils.cpp) + initComms(argc, argv, gridsize_from_cmdline); + + QudaGaugeParam gauge_param = newQudaGaugeParam(); if (prec_sloppy == QUDA_INVALID_PRECISION) prec_sloppy = prec; if (link_recon_sloppy == QUDA_RECONSTRUCT_INVALID) link_recon_sloppy = link_recon; - // initialize QMP/MPI, QUDA comms grid and RNG (host_utils.cpp) - initComms(argc, argv, gridsize_from_cmdline); + setWilsonGaugeParam(gauge_param); + setDims(gauge_param.X); + + display_test_info(); + + gauge_load = strcmp(latfile, ""); + gauge_store = strcmp(gauge_outfile, ""); + + // If we are passing a gauge field to the test, we must allocate host memory. + // If no gauge is passed, we generate a quenched field on the device. + if (gauge_load) { + printfQuda("Loading gauge field from host\n"); + for (int dir = 0; dir < 4; dir++) { + host_gauge[dir] = safe_malloc(V * gauge_site_size * host_gauge_data_type_size); + } + constructHostGaugeField(host_gauge, gauge_param, argc, argv); + } // call srand() with a rank-dependent seed initRand(); - display_test_info(); - // initialize the QUDA library initQuda(device_ordinal); - // initalize google test, includes command line options - ::testing::InitGoogleTest(&argc, argv); - // Ensure gtest prints only from rank 0 ::testing::TestEventListeners &listeners = ::testing::UnitTest::GetInstance()->listeners(); if (comm_rank() != 0) { delete listeners.Release(listeners.default_result_printer()); } @@ -406,8 +416,14 @@ int main(int argc, char **argv) // return code for google test int test_rc = RUN_ALL_TESTS(); + if (gauge_load) { + // release memory + for (int dir = 0; dir < 4; dir++) host_free(host_gauge[dir]); + } + endQuda(); - finalizeComms(); - return test_rc; + finalizeComms(); + + return test_rc; } diff --git a/tests/gauge_alg_test.cpp b/tests/gauge_alg_test.cpp deleted file mode 100644 index 5728dd1141..0000000000 --- a/tests/gauge_alg_test.cpp +++ /dev/null @@ -1,429 +0,0 @@ -#include -#include -#include - -#include -#include -#include - -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include - -#include - -#include - -using namespace quda; - -//***********************************************************// -// This boolean controls whether or not the full Google test // -// is done. If the user passes a value of 1 or 2 to --test // -// then a single instance of OVR or FFT gauge fixing is done // -// and the value of this bool is set to false. Otherwise the // -// Google tests are performed. // -//***********************************************************// -bool execute = true; - -bool gauge_load; -bool gauge_store; - -void *host_gauge[4]; - -void display_test_info() -{ - printfQuda("running the following test:\n"); - - switch (test_type) { - case 0: printfQuda("\n Google testing\n"); break; - case 1: printfQuda("\nOVR gauge fix\n"); break; - case 2: printfQuda("\nFFT gauge fix\n"); break; - default: errorQuda("Undefined test type %d given", test_type); - } - - printfQuda("prec sloppy_prec link_recon sloppy_link_recon S_dimension T_dimension Ls_dimension\n"); - printfQuda("%s %s %s %s %d/%d/%d %d %d\n", get_prec_str(prec), - get_prec_str(prec_sloppy), get_recon_str(link_recon), get_recon_str(link_recon_sloppy), xdim, ydim, zdim, - tdim, Lsdim); - - printfQuda("Grid partition info: X Y Z T\n"); - printfQuda(" %d %d %d %d\n", dimPartitioned(0), dimPartitioned(1), dimPartitioned(2), - dimPartitioned(3)); -} - -class GaugeAlgTest : public ::testing::Test -{ -protected: - QudaGaugeParam param; - - Timer a0, a1; - double2 detu; - double3 plaq; - GaugeField *U; - int nsteps; - int nhbsteps; - int novrsteps; - bool coldstart; - double beta_value; - - bool unit_test; - - void SetReunitarizationConsts() - { - const double unitarize_eps = 1e-14; - const double max_error = 1e-10; - const int reunit_allow_svd = 1; - const int reunit_svd_only = 0; - const double svd_rel_error = 1e-6; - const double svd_abs_error = 1e-6; - setUnitarizeLinksConstants(unitarize_eps, max_error, reunit_allow_svd, reunit_svd_only, svd_rel_error, svd_abs_error); - } - - bool comparePlaquette(double3 a, double3 b) - { - double a0, a1, a2; - a0 = std::abs(a.x - b.x); - a1 = std::abs(a.y - b.y); - a2 = std::abs(a.z - b.z); - double prec_val = 1.0e-5; - if (prec == QUDA_DOUBLE_PRECISION) prec_val = gf_tolerance*1e2; - return ((a0 < prec_val) && (a1 < prec_val) && (a2 < prec_val)); - } - - bool CheckDeterminant(double2 detu) - { - double prec_val = 5e-8; - if (prec == QUDA_DOUBLE_PRECISION) prec_val = gf_tolerance*1e2; - return (std::abs(1.0 - detu.x) < prec_val && std::abs(detu.y) < prec_val); - } - - virtual void SetUp() - { - if (execute) { - setVerbosity(QUDA_VERBOSE); - param = newQudaGaugeParam(); - - // Setup gauge container. - setWilsonGaugeParam(param); - param.t_boundary = QUDA_PERIODIC_T; - - // Reunitarization setup - int *num_failures_h = (int *)mapped_malloc(sizeof(int)); - int *num_failures_d = (int *)get_mapped_device_pointer(num_failures_h); - SetReunitarizationConsts(); - - a0.start(); - - // If no field is loaded, create a physical quenched field on the device - if (!gauge_load) { - GaugeFieldParam gParam(param); - gParam.ghostExchange = QUDA_GHOST_EXCHANGE_EXTENDED; - gParam.create = QUDA_NULL_FIELD_CREATE; - gParam.reconstruct = link_recon; - gParam.setPrecision(prec, true); - for (int d = 0; d < 4; d++) { - if (comm_dim_partitioned(d)) gParam.r[d] = 2; - gParam.x[d] += 2 * gParam.r[d]; - } - - U = new cudaGaugeField(gParam); - - RNG randstates(*U, 1234); - - nsteps = heatbath_num_steps; - nhbsteps = heatbath_num_heatbath_per_step; - novrsteps = heatbath_num_overrelax_per_step; - coldstart = heatbath_coldstart; - beta_value = heatbath_beta_value; - a1.start(); - - if (coldstart) - InitGaugeField(*U); - else - InitGaugeField(*U, randstates); - - for (int step = 1; step <= nsteps; ++step) { - printfQuda("Step %d\n", step); - Monte(*U, randstates, beta_value, nhbsteps, novrsteps); - - // Reunitarization - *num_failures_h = 0; - unitarizeLinks(*U, num_failures_d); - qudaDeviceSynchronize(); - if (*num_failures_h > 0) errorQuda("Error in the unitarization (%d errors)", *num_failures_h); - plaq = plaquette(*U); - printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z); - } - - a1.stop(); - printfQuda("Time Monte -> %.6f s\n", a1.last()); - } else { - - // If a field is loaded, create a device field and copy - printfQuda("Copying gauge field from host\n"); - param.location = QUDA_CPU_FIELD_LOCATION; - GaugeFieldParam gauge_field_param(param, host_gauge); - gauge_field_param.ghostExchange = QUDA_GHOST_EXCHANGE_NO; - GaugeField *host = GaugeField::Create(gauge_field_param); - - // switch the parameters for creating the mirror precise cuda gauge field - gauge_field_param.create = QUDA_NULL_FIELD_CREATE; - gauge_field_param.reconstruct = param.reconstruct; - gauge_field_param.setPrecision(param.cuda_prec, true); - - if (comm_partitioned()) { - int R[4] = {0, 0, 0, 0}; - for (int d = 0; d < 4; d++) if (comm_dim_partitioned(d)) R[d] = 2; - static TimeProfile GaugeFix("GaugeFix"); - cudaGaugeField *tmp = new cudaGaugeField(gauge_field_param); - tmp->copy(*host); - U = createExtendedGauge(*tmp, R, GaugeFix); - delete tmp; - } else { - U = new cudaGaugeField(gauge_field_param); - U->copy(*host); - } - - delete host; - - // Reunitarization - *num_failures_h = 0; - unitarizeLinks(*U, num_failures_d); - qudaDeviceSynchronize(); - if (*num_failures_h > 0) errorQuda("Error in the unitarization (%d errors)", *num_failures_h); - - plaq = plaquette(*U); - printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z); - } - - - // If a specific test type is requested, perfrom it now and then - // turn off all Google tests in the tear down. - switch (test_type) { - case 0: - // Do the Google testing - break; - case 1: run_ovr(); break; - case 2: run_fft(); break; - default: errorQuda("Invalid test type %d", test_type); - } - - host_free(num_failures_h); - } - } - - virtual void TearDown() - { - if (execute) { - detu = getLinkDeterminant(*U); - double2 tru = getLinkTrace(*U); - printfQuda("Det: %.16e:%.16e\n", detu.x, detu.y); - printfQuda("Tr: %.16e:%.16e\n", tru.x / 3.0, tru.y / 3.0); - - delete U; - // Release all temporary memory used for data exchange between GPUs in multi-GPU mode - PGaugeExchangeFree(); - - a0.stop(); - printfQuda("Time -> %.6f s\n", a0.last()); - } - // If we performed a specific instance, switch off the - // Google testing. - if (test_type != 0) execute = false; - } - - virtual void run_ovr() - { - if (execute) { - gaugeFixingOVR(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, - gf_reunit_interval, gf_theta_condition); - auto plaq_gf = plaquette(*U); - printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z); - printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); - ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); - saveTuneCache(); - // Save if output string is specified - if (gauge_store) save_gauge(); - } - } - virtual void run_fft() - { - if (execute) { - if (!comm_partitioned()) { - printfQuda("Landau gauge fixing with steepest descent method with FFTs\n"); - gaugeFixingFFT(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, - gf_theta_condition); - - auto plaq_gf = plaquette(*U); - printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z); - printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); - ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); - saveTuneCache(); - // Save if output string is specified - if (gauge_store) save_gauge(); - } else { - errorQuda("Cannot perform FFT gauge fixing with MPI partitions."); - } - } - } - - virtual void save_gauge() - { - printfQuda("Saving the gauge field to file %s\n", gauge_outfile); - - QudaGaugeParam gauge_param = newQudaGaugeParam(); - setWilsonGaugeParam(gauge_param); - - void *cpu_gauge[4]; - for (int dir = 0; dir < 4; dir++) { cpu_gauge[dir] = safe_malloc(V * gauge_site_size * gauge_param.cpu_prec); } - - GaugeFieldParam gParam(param); - gParam.ghostExchange = QUDA_GHOST_EXCHANGE_NO; - gParam.create = QUDA_NULL_FIELD_CREATE; - gParam.link_type = param.type; - gParam.reconstruct = param.reconstruct; - gParam.setPrecision(gParam.Precision(), true); - - cudaGaugeField *gauge = new cudaGaugeField(gParam); - - // copy into regular field - copyExtendedGauge(*gauge, *U, QUDA_CUDA_FIELD_LOCATION); - saveGaugeFieldQuda((void *)cpu_gauge, (void *)gauge, &gauge_param); - - // Write to disk - write_gauge_field(gauge_outfile, cpu_gauge, gauge_param.cpu_prec, gauge_param.X, 0, (char **)0); - - for (int dir = 0; dir < 4; dir++) host_free(cpu_gauge[dir]); - delete gauge; - } -}; - - -TEST_F(GaugeAlgTest, Generation) -{ - if (execute && !gauge_load) { - detu = getLinkDeterminant(*U); - ASSERT_TRUE(CheckDeterminant(detu)); - } -} - -TEST_F(GaugeAlgTest, Landau_Overrelaxation) -{ - if (execute) { - printfQuda("Landau gauge fixing with overrelaxation\n"); - gaugeFixingOVR(*U, 4, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, - gf_theta_condition); - auto plaq_gf = plaquette(*U); - printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z); - printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); - ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); - saveTuneCache(); - } -} - -bool checkDimsPartitioned() -{ - if (comm_dim_partitioned(0) || comm_dim_partitioned(1) || comm_dim_partitioned(2) || comm_dim_partitioned(3)) - return true; - return false; -} - -bool comparePlaquette(double3 a, double3 b) -{ - printfQuda("Plaq: %.16e, %.16e, %.16e\n", a.x, a.y, a.z); - printfQuda("Plaq_gf: %.16e, %.16e, %.16e\n", b.x, b.y, b.z); - double a0, a1, a2; - a0 = std::abs(a.x - b.x); - a1 = std::abs(a.y - b.y); - a2 = std::abs(a.z - b.z); - double prec_val = 1.0e-5; - if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1.0e-15; - return ((a0 < prec_val) && (a1 < prec_val) && (a2 < prec_val)); -} - -bool checkDeterminant(double2 detu) -{ - printfQuda("Det: %.16e: %.16e\n", detu.x, detu.y); - double prec_val = 5e-8; - if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1.0e-15; - return std::abs(1.0 - detu.x) < prec_val && std::abs(detu.y) < prec_val; -} - -int main(int argc, char **argv) -{ - // initalize google test, includes command line options - ::testing::InitGoogleTest(&argc, argv); - - // command line options - auto app = make_app(); - add_gaugefix_option_group(app); - add_heatbath_option_group(app); - - test_type = 0; - CLI::TransformPairs test_type_map {{"Google", 0}, {"OVR", 1}, {"FFT", 2}}; - app->add_option("--test", test_type, "Test method")->transform(CLI::CheckedTransformer(test_type_map)); - - try { - app->parse(argc, argv); - } catch (const CLI::ParseError &e) { - return app->exit(e); - } - - // initialize QMP/MPI, QUDA comms grid and RNG (host_utils.cpp) - initComms(argc, argv, gridsize_from_cmdline); - - QudaGaugeParam gauge_param = newQudaGaugeParam(); - if (prec_sloppy == QUDA_INVALID_PRECISION) prec_sloppy = prec; - if (link_recon_sloppy == QUDA_RECONSTRUCT_INVALID) link_recon_sloppy = link_recon; - - setWilsonGaugeParam(gauge_param); - setDims(gauge_param.X); - - display_test_info(); - - gauge_load = strcmp(latfile, ""); - gauge_store = strcmp(gauge_outfile, ""); - - // If we are passing a gauge field to the test, we must allocate host memory. - // If no gauge is passed, we generate a quenched field on the device. - if (gauge_load) { - printfQuda("Loading gauge field from host\n"); - for (int dir = 0; dir < 4; dir++) { - host_gauge[dir] = safe_malloc(V * gauge_site_size * host_gauge_data_type_size); - } - constructHostGaugeField(host_gauge, gauge_param, argc, argv); - } - - // call srand() with a rank-dependent seed - initRand(); - - // initialize the QUDA library - initQuda(device_ordinal); - - // Ensure gtest prints only from rank 0 - ::testing::TestEventListeners &listeners = ::testing::UnitTest::GetInstance()->listeners(); - if (comm_rank() != 0) { delete listeners.Release(listeners.default_result_printer()); } - - // return code for google test - int test_rc = RUN_ALL_TESTS(); - - if (gauge_load) { - // release memory - for (int dir = 0; dir < 4; dir++) host_free(host_gauge[dir]); - } - - endQuda(); - - finalizeComms() - - return test_rc; -} From dfb26685cdf9bf2cb13b473a565357a569e3975b Mon Sep 17 00:00:00 2001 From: cpviolator Date: Thu, 5 Aug 2021 14:53:41 -0700 Subject: [PATCH 14/32] Add gauge param argument --- lib/interface_quda.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp index 73cdb76b45..5e6848bd79 100644 --- a/lib/interface_quda.cpp +++ b/lib/interface_quda.cpp @@ -5545,7 +5545,7 @@ int computeGaugeFixingOVRQuda(void *gauge, const unsigned int gauge_dir, const u checkGaugeParam(param); profileGaugeFixOVR.TPSTART(QUDA_PROFILE_INIT); - GaugeFieldParam gParam(*param); + GaugeFieldParam gParam(*param, gauge); auto *cpuGauge = new cpuGaugeField(gParam); // gParam.pad = getFatLinkPadding(param->X); From bb9c0de72f285c002fb140f9a2bae100d80f3cd6 Mon Sep 17 00:00:00 2001 From: cpviolator Date: Mon, 11 Oct 2021 11:35:12 -0700 Subject: [PATCH 15/32] Make gf_fft_autotune default to true --- tests/gauge_alg_ctest.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/gauge_alg_ctest.cpp b/tests/gauge_alg_ctest.cpp index 9902daab8f..fb810385ba 100644 --- a/tests/gauge_alg_ctest.cpp +++ b/tests/gauge_alg_ctest.cpp @@ -69,7 +69,7 @@ double gf_fft_alpha = 0.8; int gf_reunit_interval = 10; double gf_tolerance = 1e-6; bool gf_theta_condition = false; -bool gf_fft_autotune = false; +bool gf_fft_autotune = true; void add_gaugefix_option_group(std::shared_ptr quda_app) { From 82658431f35097db72e5f86ea6733d2db1a853b0 Mon Sep 17 00:00:00 2001 From: cpviolator Date: Tue, 12 Oct 2021 14:20:15 -0700 Subject: [PATCH 16/32] Remove alpha autotuning in FFT gauge fixing, add comments where appropriate. Fix gauge dir error in gauge_alg_test, adjust step output in gauge fixing --- include/quda.h | 2 +- lib/gauge_fix_fft.cu | 24 ++++++++++++++++++------ lib/gauge_fix_ovr.cu | 6 +++--- lib/interface_quda.cpp | 10 ++++++++-- tests/gauge_alg_ctest.cpp | 37 ++++++++++++++++++++++++++----------- 5 files changed, 56 insertions(+), 23 deletions(-) diff --git a/include/quda.h b/include/quda.h index bf9bbe64c3..4b0e8c5917 100644 --- a/include/quda.h +++ b/include/quda.h @@ -1525,7 +1525,7 @@ extern "C" { * @param[in] Nsteps, maximum number of steps to perform gauge fixing * @param[in] verbose_interval, print gauge fixing info when iteration count is a multiple of this * @param[in] alpha, gauge fixing parameter of the method, most common value is 0.08 - * @param[in] autotune, 1 to autotune the method, i.e., if the Fg inverts its tendency we decrease the alpha value + * @param[in] autotune (legacy), 1 to autotune the method, i.e., if the Fg inverts its tendency we decrease the alpha value. We hardocde this to 1 to ensure optimal behaviour. Instructions on how the user may regain control of this parameter are located in comments in lib/gauge_fix_fft.cu * @param[in] tolerance, torelance value to stop the method, if this value is zero then the method stops when * iteration reachs the maximum number of steps defined by Nsteps * @param[in] stopWtheta, 0 for MILC criterion and 1 to use the theta value diff --git a/lib/gauge_fix_fft.cu b/lib/gauge_fix_fft.cu index 54c7ab8845..c92f80a242 100644 --- a/lib/gauge_fix_fft.cu +++ b/lib/gauge_fix_fft.cu @@ -185,12 +185,24 @@ namespace quda { void gaugeFixingFFT(GaugeField& data, int Nsteps, int verbose_interval, double alpha0, int autotune, double tolerance, int stopWtheta) { + // We hardcode the value of autotune to 1 at this point as it is the last + // point before computation begins. This ensures that the the user cannot + // override alpha autotuning. This is done because it is very easy for the + // FFT gauge fixing to fail with a poorly chosen value of alpha, but + // autotuning ensures optimal behaviour. + // Users who wish to change this behaviour may remove the follwing line + // of code and recompile to regain control of alpha autotuning. + autotune = 1; + TimeProfile profileInternalGaugeFixFFT("InternalGaugeFixQudaFFT", false); profileInternalGaugeFixFFT.TPSTART(QUDA_PROFILE_COMPUTE); if (getVerbosity() >= QUDA_SUMMARIZE) { - printfQuda("\tAuto tune active: %s\n", autotune ? "true" : "false"); + if(autotune == 1) printfQuda("\tAuto tune active: alpha will be adjusted as the algorithm progresses\n"); + else if(autotune == 0) printfQuda("\tAuto tune not active: alpha will remain constant as the algorithm progresses\n"); + else errorQuda("Unknown value of autotune = %d", autotune); + printfQuda("\tAlpha parameter of the Steepest Descent Method: %e\n", alpha0); printfQuda("\tTolerance: %e\n", tolerance); printfQuda("\tStop criterion method: %s\n", stopWtheta ? "Theta" : "Delta"); @@ -217,7 +229,7 @@ namespace quda { GaugeFixQuality gfixquality(argQ, data); gfixquality.apply(device::get_default_stream()); double action0 = argQ.getAction(); - if(getVerbosity() >= QUDA_SUMMARIZE) printf("Step: %d\tAction: %.16e\ttheta: %.16e\n", 0, argQ.getAction(), argQ.getTheta()); + if(getVerbosity() >= QUDA_SUMMARIZE) printf("Step: %05d\tAction: %.16e\ttheta: %.16e\n", 0, argQ.getAction(), argQ.getTheta()); double diff = 0.0; int iter = 0; @@ -285,11 +297,11 @@ namespace quda { double action = argQ.getAction(); diff = abs(action0 - action); if ((iter % verbose_interval) == (verbose_interval - 1) && getVerbosity() >= QUDA_SUMMARIZE) - printf("Step: %d\tAction: %.16e\ttheta: %.16e\tDelta: %.16e\n", iter + 1, argQ.getAction(), argQ.getTheta(), diff); + printf("Step: %05d\tAction: %.16e\ttheta: %.16e\tDelta: %.16e\n", iter + 1, argQ.getAction(), argQ.getTheta(), diff); if ( autotune && ((action - action0) < -1e-14) ) { if ( arg.alpha > 0.01 ) { arg.alpha = 0.95 * arg.alpha; - if(getVerbosity() >= QUDA_SUMMARIZE) printf(">>>>>>>>>>>>>> Warning: changing alpha down -> %.4e\n", arg.alpha); + if(getVerbosity() >= QUDA_SUMMARIZE) printf("Changing alpha down -> %.4e\n", arg.alpha); } } //------------------------------------------------------------------------ @@ -301,7 +313,7 @@ namespace quda { action0 = action; } if ((iter % verbose_interval) != 0 && getVerbosity() >= QUDA_SUMMARIZE) - printf("Step: %d\tAction: %.16e\ttheta: %.16e\tDelta: %.16e\n", iter, argQ.getAction(), argQ.getTheta(), diff); + printf("Step: %05d\tAction: %.16e\ttheta: %.16e\tDelta: %.16e\n", iter, argQ.getAction(), argQ.getTheta(), diff); // Reunitarize at end const double unitarize_eps = 1e-14; @@ -382,7 +394,7 @@ namespace quda { * @param[in] Nsteps, maximum number of steps to perform gauge fixing * @param[in] verbose_interval, print gauge fixing info when iteration count is a multiple of this * @param[in] alpha, gauge fixing parameter of the method, most common value is 0.08 - * @param[in] autotune, 1 to autotune the method, i.e., if the Fg inverts its tendency we decrease the alpha value + * @param[in] autotune (legacy), 1 to autotune the method, i.e., if the Fg inverts its tendency we decrease the alpha value. We hardcode this to true. * @param[in] tolerance, torelance value to stop the method, if this value is zero then the method stops when iteration reachs the maximum number of steps defined by Nsteps * @param[in] stopWtheta, 0 for MILC criterion and 1 to use the theta value */ diff --git a/lib/gauge_fix_ovr.cu b/lib/gauge_fix_ovr.cu index b97772f43d..93a3521530 100644 --- a/lib/gauge_fix_ovr.cu +++ b/lib/gauge_fix_ovr.cu @@ -313,7 +313,7 @@ namespace quda { flop += (double)GaugeFixQuality.flops(); byte += (double)GaugeFixQuality.bytes(); double action0 = argQ.getAction(); - if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Step: %d\tAction: %.16e\ttheta: %.16e\n", 0, argQ.getAction(), argQ.getTheta()); + if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Step: %05d\tAction: %.16e\ttheta: %.16e\n", 0, argQ.getAction(), argQ.getTheta()); *num_failures_h = 0; unitarizeLinks(data, data, num_failures_d); @@ -413,7 +413,7 @@ namespace quda { double action = argQ.getAction(); double diff = abs(action0 - action); if ((iter % verbose_interval) == (verbose_interval - 1) && getVerbosity() >= QUDA_VERBOSE) - printfQuda("Step: %d\tAction: %.16e\ttheta: %.16e\tDelta: %.16e\n", iter + 1, argQ.getAction(), argQ.getTheta(), diff); + printfQuda("Step: %05d\tAction: %.16e\ttheta: %.16e\tDelta: %.16e\n", iter + 1, argQ.getAction(), argQ.getTheta(), diff); if (stopWtheta) { if (argQ.getTheta() < tolerance) break; } else { @@ -436,7 +436,7 @@ namespace quda { byte += (double)GaugeFixQuality.bytes(); double action = argQ.getAction(); double diff = abs(action0 - action); - if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Step: %d\tAction: %.16e\ttheta: %.16e\tDelta: %.16e\n", iter + 1, argQ.getAction(), argQ.getTheta(), diff); + if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Step: %05d\tAction: %.16e\ttheta: %.16e\tDelta: %.16e\n", iter + 1, argQ.getAction(), argQ.getTheta(), diff); } for (int i = 0; i < 2 && nlinksfaces; i++) managed_free(borderpoints[i]); diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp index 0c5ff3b8df..a99373452c 100644 --- a/lib/interface_quda.cpp +++ b/lib/interface_quda.cpp @@ -5652,10 +5652,16 @@ int computeGaugeFixingFFTQuda(void* gauge, const unsigned int gauge_dir, const // perform the update profileGaugeFixFFT.TPSTART(QUDA_PROFILE_COMPUTE); + // We hardcode the value of autotune to 1 in the kernel call (lib/gauge_fix_fft.cu) + // This ensures that the user can not override alpha autotuning. This is done because + // it is very easy for the FFT gauge fixing to fail with a poorly chosen value of + // alpha, but autotuning alpha ensures optimal behaviour. + // Users who wish to change this behaviour may read the comment in + // lib/gauge_fix_fft.cu to regain control. gaugeFixingFFT(*cudaInGauge, gauge_dir, Nsteps, verbose_interval, alpha, autotune, tolerance, stopWtheta); - + profileGaugeFixFFT.TPSTOP(QUDA_PROFILE_COMPUTE); - + // copy the gauge field back to the host profileGaugeFixFFT.TPSTART(QUDA_PROFILE_D2H); cudaInGauge->saveCPUField(*cpuGauge); diff --git a/tests/gauge_alg_ctest.cpp b/tests/gauge_alg_ctest.cpp index fb810385ba..4ea6a7fbc4 100644 --- a/tests/gauge_alg_ctest.cpp +++ b/tests/gauge_alg_ctest.cpp @@ -69,7 +69,6 @@ double gf_fft_alpha = 0.8; int gf_reunit_interval = 10; double gf_tolerance = 1e-6; bool gf_theta_condition = false; -bool gf_fft_autotune = true; void add_gaugefix_option_group(std::shared_ptr quda_app) { @@ -90,9 +89,6 @@ void add_gaugefix_option_group(std::shared_ptr quda_app) opgroup->add_option( "--gf-theta-condition", gf_theta_condition, "Use the theta value to determine the gauge fixing if true. If false, use the delta value (default false)"); - opgroup->add_option( - "--gf-fft-autotune", gf_fft_autotune, - "In the FFT method, automatically adjust the alpha parameter if the quality begins to diverge (default false)"); } class GaugeAlgTest : public ::testing::Test { @@ -285,6 +281,7 @@ class GaugeAlgTest : public ::testing::Test { virtual void run_ovr() { if (execute) { + printfQuda("%s gauge fixing with overrelaxation method\n", gf_gauge_dir == 4 ? "Landau" : "Coulomb"); gaugeFixingOVR(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, gf_theta_condition); auto plaq_gf = plaquette(*U); @@ -300,8 +297,14 @@ class GaugeAlgTest : public ::testing::Test { { if (execute) { if (!checkDimsPartitioned()) { - printfQuda("Landau gauge fixing with steepest descent method with FFTs\n"); - gaugeFixingFFT(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, + printfQuda("%s gauge fixing with steepest descent method with FFT\n", gf_gauge_dir == 4 ? "Landau" : "Coulomb"); + // We hardcode the value of autotune to 1 in the kernel call (lib/gauge_fix_fft.cu) + // This ensures that the user can not override alpha autotuning. This is done because + // it is very easy for the FFT gauge fixing to fail with a poorly chosen value of + // alpha, but autotuning alpha ensures optimal behaviour. + // Users who wish to change this behaviour may read the comment in + // lib/gauge_fix_fft.cu to regain control. + gaugeFixingFFT(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, 1, gf_tolerance, gf_theta_condition); auto plaq_gf = plaquette(*U); @@ -389,8 +392,14 @@ TEST_F(GaugeAlgTest, Landau_FFT) { if (execute) { if (!comm_partitioned()) { - printfQuda("Landau gauge fixing with steepest descent method with FFTs\n"); - gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, + printfQuda("Landau gauge fixing with steepest descent method with FFT\n"); + // We hardcode the value of autotune to 1 in the kernel call (lib/gauge_fix_fft.cu) + // This ensures that the user can not override alpha autotuning. This is done because + // it is very easy for the FFT gauge fixing to fail with a poorly chosen value of + // alpha, but autotuning alpha ensures optimal behaviour. + // Users who wish to change this behaviour may read the comment in + // lib/gauge_fix_fft.cu to regain control. + gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, 1, gf_tolerance, gf_theta_condition); auto plaq_gf = plaquette(*U); printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z); @@ -405,10 +414,16 @@ TEST_F(GaugeAlgTest, Coulomb_FFT) { if (execute) { if (!comm_partitioned()) { - printfQuda("Coulomb gauge fixing with steepest descent method with FFTs\n"); - gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, + printfQuda("Coulomb gauge fixing with steepest descent method with FFT\n"); + // We hardcode the value of autotune to 1 in the kernel call (lib/gauge_fix_fft.cu) + // This ensures that the user can not override alpha autotuning. This is done because + // it is very easy for the FFT gauge fixing to fail with a poorly chosen value of + // alpha, but autotuning alpha ensures optimal behaviour. + // Users who wish to change this behaviour may read the comment in + // lib/gauge_fix_fft.cu to regain control. + gaugeFixingFFT(*U, 3, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, 1, gf_tolerance, gf_theta_condition); - auto plaq_gf = plaquette(*U); +auto plaq_gf = plaquette(*U); printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z); printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); From 040be93004b847b2092280c1b79016904943a511 Mon Sep 17 00:00:00 2001 From: cpviolator Date: Fri, 29 Oct 2021 12:21:46 -0700 Subject: [PATCH 17/32] Revert MPI test params --- tests/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 764cc6af97..4beabb3d81 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -812,8 +812,8 @@ foreach(prec IN LISTS TEST_PRECS) if(QUDA_GAUGE_ALG) add_test(NAME gauge_alg_${prec} - COMMAND ${QUDA_CTEST_LAUNCH} $ ${MPIEXEC_POSTFLAGS} - --dim 2 4 6 8 --prec ${prec} + COMMAND ${QUDA_CTEST_LAUNCH} $ ${MPIEXEC_POSTFLAGS} + --dim 4 6 8 10 --prec ${prec} --gtest_output=xml:gauge_arg_test_${prec}.xml) endif() From bb91ecd531bc6c0f433707cf56f1ea2e8932433a Mon Sep 17 00:00:00 2001 From: cpviolator Date: Fri, 29 Oct 2021 12:54:52 -0700 Subject: [PATCH 18/32] Use device_timer instead of Timer --- tests/gauge_alg_ctest.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/gauge_alg_ctest.cpp b/tests/gauge_alg_ctest.cpp index 4ea6a7fbc4..4afa0598ce 100644 --- a/tests/gauge_alg_ctest.cpp +++ b/tests/gauge_alg_ctest.cpp @@ -95,8 +95,8 @@ class GaugeAlgTest : public ::testing::Test { protected: QudaGaugeParam param; - - Timer a0,a1; + + device_timer_t device_timer_1, device_timer_2; double2 detu; double3 plaq; cudaGaugeField *U; @@ -158,7 +158,7 @@ class GaugeAlgTest : public ::testing::Test { int *num_failures_d = (int *)get_mapped_device_pointer(num_failures_h); SetReunitarizationConsts(); - a0.start(); + device_timer_1.start(); // If no field is loaded, create a physical quenched field on the device if (!gauge_load) { @@ -181,7 +181,7 @@ class GaugeAlgTest : public ::testing::Test { novrsteps = heatbath_num_overrelax_per_step; coldstart = heatbath_coldstart; beta_value = heatbath_beta_value; - a1.start(); + device_timer_2.start(); if (coldstart) InitGaugeField(*U); @@ -201,8 +201,8 @@ class GaugeAlgTest : public ::testing::Test { printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z); } - a1.stop(); - printfQuda("Time Monte -> %.6f s\n", a1.last()); + device_timer_2.stop(); + printfQuda("Time Monte -> %.6f s\n", device_timer_2.last()); } else { // If a field is loaded, create a device field and copy @@ -270,8 +270,8 @@ class GaugeAlgTest : public ::testing::Test { // Release all temporary memory used for data exchange between GPUs in multi-GPU mode PGaugeExchangeFree(); - a0.stop(__func__, __FILE__, __LINE__); - printfQuda("Time -> %.6f s\n", a0.last()); + device_timer_1.stop(); + printfQuda("Time -> %.6f s\n", device_timer_1.last()); } // If we performed a specific instance, switch off the // Google testing. @@ -328,7 +328,7 @@ class GaugeAlgTest : public ::testing::Test { setWilsonGaugeParam(gauge_param); void *cpu_gauge[4]; - for (int dir = 0; dir < 4; dir++) { cpu_gauge[dir] = malloc(V * gauge_site_size * gauge_param.cpu_prec); } + for (int dir = 0; dir < 4; dir++) { cpu_gauge[dir] = safe_malloc(V * gauge_site_size * gauge_param.cpu_prec); } GaugeFieldParam gParam(param); gParam.ghostExchange = QUDA_GHOST_EXCHANGE_NO; From f2558fea690c8ce414ea8a1ec9d0d8ee6b03ad51 Mon Sep 17 00:00:00 2001 From: cpviolator Date: Fri, 29 Oct 2021 16:41:19 -0700 Subject: [PATCH 19/32] Remove extra semi-colon --- tests/su3_test.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/su3_test.cpp b/tests/su3_test.cpp index a5b1b83afb..9f166b6ca6 100644 --- a/tests/su3_test.cpp +++ b/tests/su3_test.cpp @@ -94,8 +94,7 @@ void add_su3_option_group(std::shared_ptr quda_app) opgroup->add_option("--su3-wflow-type", wflow_type, "The type of action to use in the wilson flow (default wilson)") ->transform(CLI::QUDACheckedTransformer(wflow_type_map)); - ; - + opgroup->add_option("--su3-measurement-interval", measurement_interval, "Measure the field energy and topological charge every Nth step (default 5) "); } From 4f98cc54dd1f853f4473686af03e5520bba7d5d0 Mon Sep 17 00:00:00 2001 From: cpviolator Date: Fri, 29 Oct 2021 16:53:46 -0700 Subject: [PATCH 20/32] Consolidate the gauge fixing interafce, use new parameter struct QudaGaugeFixParam to define both parameters and algorithm, format variable names to be consistent, use QudaBoolean for booleans rather than integers --- include/enum_quda.h | 6 ++ include/enum_quda_fortran.h | 11 +++ include/gauge_tools.h | 22 ++--- include/quda.h | 73 ++++++++------- lib/check_params.h | 49 +++++++++- lib/gauge_fix_fft.cu | 58 +++++------- lib/gauge_fix_ovr.cu | 34 +++---- lib/interface_quda.cpp | 178 ++++++++++++------------------------ lib/milc_interface.cpp | 33 +++++-- tests/gauge_alg_ctest.cpp | 46 ++++------ 10 files changed, 255 insertions(+), 255 deletions(-) diff --git a/include/enum_quda.h b/include/enum_quda.h index 15354a1de7..fee22c5169 100644 --- a/include/enum_quda.h +++ b/include/enum_quda.h @@ -550,6 +550,12 @@ typedef enum QudaWFlowType_s { QUDA_WFLOW_TYPE_INVALID = QUDA_INVALID_ENUM } QudaWFlowType; +typedef enum QudaGaugeFixType_s { + QUDA_GAUGEFIX_TYPE_OVR = 0, + QUDA_GAUGEFIX_TYPE_FFT = 1, + QUDA_GAUGEFIX_TYPE_INVALID = QUDA_INVALID_ENUM +} QudaGaugeFixType; + // Allows to choose an appropriate external library typedef enum QudaExtLibType_s { QUDA_CUSOLVE_EXTLIB, diff --git a/include/enum_quda_fortran.h b/include/enum_quda_fortran.h index e810b1631a..7614e2bbdc 100644 --- a/include/enum_quda_fortran.h +++ b/include/enum_quda_fortran.h @@ -493,6 +493,17 @@ #define QUDA_CONTRACT_GAMMA_S34 15 #define QUDA_CONTRACT_GAMMA_INVALID QUDA_INVALID_ENUM +#define QudaWFlowType integer(4) +#define QUDA_WFLOW_TYPE_WILSON 0 +#define QUDA_WFLOW_TYPE_SYMANZIK 1 +#define QUDA_WFLOW_TYPE_INVALID QUDA_INVALID_ENUM + +#define QudaGaugeFixType integer(4) +#define QUDA_GAUGEFIX_TYPE_OVR 0 +#define QUDA_GAUGEFIX_TYPE_FFT 1 +#define QUDA_GAUGEFIX_TYPE_INVALID QUDA_INVALID_ENUM + + #define QudaExtLibType integer(4) #define QUDA_CUSOLVE_EXTLIB 0 #define QUDA_EIGEN_EXTLIB 1 diff --git a/include/gauge_tools.h b/include/gauge_tools.h index e32f38f5b1..25b1691195 100644 --- a/include/gauge_tools.h +++ b/include/gauge_tools.h @@ -114,33 +114,33 @@ namespace quda * @brief Gauge fixing with overrelaxation with support for single and multi GPU. * @param[in,out] data, quda gauge field * @param[in] gauge_dir, 3 for Coulomb gauge fixing, other for Landau gauge fixing - * @param[in] Nsteps, maximum number of steps to perform gauge fixing + * @param[in] steps, maximum number of steps to perform gauge fixing * @param[in] verbose_interval, print gauge fixing info when iteration count is a multiple of this * @param[in] relax_boost, gauge fixing parameter of the overrelaxation method, most common value is 1.5 or 1.7. * @param[in] tolerance, torelance value to stop the method, if this * value is zero then the method stops when iteration reachs the - * maximum number of steps defined by Nsteps + * maximum number of steps defined by steps * @param[in] reunit_interval, reunitarize gauge field when iteration count is a multiple of this - * @param[in] stopWtheta, 0 for MILC criterion and 1 to use the theta value + * @param[in] theta_condition, QUDA_BOOLEAN_FALSE for MILC criterion and QUDA_BOOLEAN_TRUE to use the theta value */ - void gaugeFixingOVR(GaugeField &data, const int gauge_dir, const int Nsteps, const int verbose_interval, - const double relax_boost, const double tolerance, const int reunit_interval, const int stopWtheta); + void gaugeFixingOVR(GaugeField &data, const int gauge_dir, const int steps, const int verbose_interval, + const double relax_boost, const double tolerance, const int reunit_interval, const QudaBoolean theta_condition); /** * @brief Gauge fixing with Steepest descent method with FFTs with support for single GPU only. * @param[in,out] data, quda gauge field * @param[in] gauge_dir, 3 for Coulomb gauge fixing, other for Landau gauge fixing - * @param[in] Nsteps, maximum number of steps to perform gauge fixing + * @param[in] steps, maximum number of steps to perform gauge fixing * @param[in] verbose_interval, print gauge fixing info when iteration count is a multiple of this * @param[in] alpha, gauge fixing parameter of the method, most common value is 0.08 - * @param[in] autotune, 1 to autotune the method, i.e., if the Fg inverts its tendency we decrease the alpha value + * @param[in] autotune, QUDA_BOOLEAN_TRUE to autotune the method, i.e., if the fix quality inverts its tendency we decrease the alpha value * @param[in] tolerance, torelance value to stop the method, if this * value is zero then the method stops when iteration reachs the - * maximum number of steps defined by Nsteps - * @param[in] stopWtheta, 0 for MILC criterion and 1 to use the theta value + * maximum number of steps defined by steps + * @param[in] theta_condition, QUDA_BOOLEAN_FALSE for MILC criterion and QUDA_BOOLEAN_TRUE to use the theta value */ - void gaugeFixingFFT(GaugeField &data, const int gauge_dir, const int Nsteps, const int verbose_interval, - const double alpha, const int autotune, const double tolerance, const int stopWtheta); + void gaugeFixingFFT(GaugeField &data, const int gauge_dir, const int steps, const int verbose_interval, + const double alpha, const QudaBoolean autotune, const double tolerance, const QudaBoolean theta_condition); /** @brief Compute the Fmunu tensor diff --git a/include/quda.h b/include/quda.h index 419bd1febe..3401cc4f67 100644 --- a/include/quda.h +++ b/include/quda.h @@ -779,6 +779,22 @@ extern "C" { QudaBLASDataOrder data_order; /**< Specifies if using Row or Column major */ } QudaBLASParam; + typedef struct QudaGaugeFixParam_s { + size_t struct_size; /**< Size of this struct in bytes. Used to ensure that the host application and QUDA see the same struct size */ + + QudaGaugeFixType fix_type; /**< The aglorithm to use for gauge fixing */ + int gauge_dir; /**< The orthogonal direction of the gauge fixing, 3=Coulomb, 4=Landau. (default 4) */ + int maxiter; /**< The maximun number of gauge fixing iterations to be applied (default 10000) */ + int verbosity_interval; /**< Print the gauge fixing progress every N steps (default 100) */ + double ovr_relaxation_boost; /**< The overrelaxation boost parameter for the overrelaxation method (default 1.5) */ + double fft_alpha; /**< The Alpha parameter in the FFT method (default 0.8) */ + QudaBoolean fft_autotune; /**< Autotune the Alpha parameter in the FFT method (default true) */ + int reunit_interval; /**< Reunitarise the gauge field every N steps (default 10) */ + double tolerance; /**< The tolerance of the gauge fixing quality (default 1e-6) */ + QudaBoolean theta_condition; /**< "Use the theta value to determine the gauge fixing if true. If false, use the delta value (default false)" */ + } QudaGaugeFixParam; + + /* * Interface functions, found in interface_quda.cpp */ @@ -955,6 +971,15 @@ extern "C" { */ QudaBLASParam newQudaBLASParam(void); + /** + * A new QudaGaugeFixParam should always be initialized immediately + * after it's defined (and prior to explicitly setting its members) + * using this function. Typical usage is as follows: + * + * QudaGaugeFixParam fix_param = newQudaGaugeFixParam(); + */ + QudaGaugeFixParam newQudaGaugeFixParam(void); + /** * Print the members of QudaGaugeParam. * @param param The QudaGaugeParam whose elements we are to print. @@ -991,6 +1016,12 @@ extern "C" { */ void printQudaBLASParam(QudaBLASParam *param); + /** + * Print the members of QudaGaugeFixParam. + * @param param The QudaGaugeFixParam whose elements we are to print. + */ + void printQudaGaugeFixParam(QudaGaugeFixParam *param); + /** * Load the gauge field from the host. * @param h_gauge Base pointer to host gauge field (regardless of dimensionality) @@ -1505,42 +1536,14 @@ extern "C" { const int *X); /** - * @brief Gauge fixing with overrelaxation with support for single and multi GPU. + * @brief Gauge fixing with overrelaxation with support for single and multi GPU, and steepest descent FFT with support for single GPU only. * @param[in,out] gauge, gauge field to be fixed - * @param[in] gauge_dir, 3 for Coulomb gauge fixing, other for Landau gauge fixing - * @param[in] Nsteps, maximum number of steps to perform gauge fixing - * @param[in] verbose_interval, print gauge fixing info when iteration count is a multiple of this - * @param[in] relax_boost, gauge fixing parameter of the overrelaxation method, most common value is 1.5 or 1.7. - * @param[in] tolerance, torelance value to stop the method, if this value is zero then the method stops when - * iteration reachs the maximum number of steps defined by Nsteps - * @param[in] reunit_interval, reunitarize gauge field when iteration count is a multiple of this - * @param[in] stopWtheta, 0 for MILC criterion and 1 to use the theta value - * @param[in] param The parameters of the external fields and the computation settings - * @param[out] timeinfo - */ - int computeGaugeFixingOVRQuda(void *gauge, const unsigned int gauge_dir, const unsigned int Nsteps, - const unsigned int verbose_interval, const double relax_boost, const double tolerance, - const unsigned int reunit_interval, const unsigned int stopWtheta, - QudaGaugeParam *param, double *timeinfo); - /** - * @brief Gauge fixing with Steepest descent method with FFTs with support for single GPU only. - * @param[in,out] gauge, gauge field to be fixed - * @param[in] gauge_dir, 3 for Coulomb gauge fixing, other for Landau gauge fixing - * @param[in] Nsteps, maximum number of steps to perform gauge fixing - * @param[in] verbose_interval, print gauge fixing info when iteration count is a multiple of this - * @param[in] alpha, gauge fixing parameter of the method, most common value is 0.08 - * @param[in] autotune (legacy), 1 to autotune the method, i.e., if the Fg inverts its tendency we decrease the alpha value. We hardocde this to 1 to ensure optimal behaviour. Instructions on how the user may regain control of this parameter are located in comments in lib/gauge_fix_fft.cu - * @param[in] tolerance, torelance value to stop the method, if this value is zero then the method stops when - * iteration reachs the maximum number of steps defined by Nsteps - * @param[in] stopWtheta, 0 for MILC criterion and 1 to use the theta value - * @param[in] param The parameters of the external fields and the computation settings - * @param[out] timeinfo - */ - int computeGaugeFixingFFTQuda(void *gauge, const unsigned int gauge_dir, const unsigned int Nsteps, - const unsigned int verbose_interval, const double alpha, const unsigned int autotune, - const double tolerance, const unsigned int stopWtheta, QudaGaugeParam *param, - double *timeinfo); - + * @param[in] gauge_param The parameters of the external fields and the computation settings + * @param[in] fix_param Container for the gauge fixing algorithm and parameters to use. + * @param[out] timeinfo Array to track timings + */ + int computeGaugeFixingQuda(void *gauge, QudaGaugeParam *gauge_param, QudaGaugeFixParam *fix_param, double *timeinfo); + /** * @brief Strided Batched GEMM * @param[in] arrayA The array containing the A matrix data diff --git a/lib/check_params.h b/lib/check_params.h index f0784ada1b..28e8370fa6 100644 --- a/lib/check_params.h +++ b/lib/check_params.h @@ -1045,8 +1045,55 @@ void printQudaBLASParam(QudaBLASParam *param) #endif } -// clean up +#if defined INIT_PARAM +QudaGaugeFixParam newQudaGaugeFixParam(void) +{ + QudaGaugeFixParam ret; +#elif defined CHECK_PARAM +static void checkGaugeFixParam(QudaGaugeFixParam *param) +{ +#else +void printQudaGaugeFixParam(QudaGaugeFixParam *param) +{ + printfQuda("QUDA gauge fix parameters:\n"); +#endif +#if defined CHECK_PARAM + if (param->struct_size != (size_t)INVALID_INT && param->struct_size != sizeof(*param)) + errorQuda("Unexpected QudaGaugeFixParam struct size %lu, expected %lu", param->struct_size, sizeof(*param)); +#else + P(struct_size, (size_t)INVALID_INT); +#endif + +#ifdef INIT_PARAM + P(gauge_dir, 4); + P(maxiter, 10000); + P(verbosity_interval, 100); + P(reunit_interval, 10); + P(ovr_relaxation_boost, 0.0); + P(fft_alpha, 0.0); + P(tolerance, 0.0); + P(fft_autotune, QUDA_BOOLEAN_FALSE); + P(theta_condition, QUDA_BOOLEAN_FALSE); +#else + P(gauge_dir, INVALID_INT); + P(maxiter, INVALID_INT); + P(verbosity_interval, INVALID_INT); + P(reunit_interval, INVALID_INT); + P(ovr_relaxation_boost, INVALID_DOUBLE); + P(fft_alpha, INVALID_DOUBLE); + P(tolerance, INVALID_DOUBLE); + P(fft_autotune, QUDA_BOOLEAN_FALSE); + P(theta_condition, QUDA_BOOLEAN_FALSE); +#endif + +#ifdef INIT_PARAM + return ret; +#endif +} + + +// clean up #undef INVALID_INT #undef INVALID_DOUBLE #undef P diff --git a/lib/gauge_fix_fft.cu b/lib/gauge_fix_fft.cu index c92f80a242..74e0849d24 100644 --- a/lib/gauge_fix_fft.cu +++ b/lib/gauge_fix_fft.cu @@ -182,31 +182,20 @@ namespace quda { }; template - void gaugeFixingFFT(GaugeField& data, int Nsteps, int verbose_interval, - double alpha0, int autotune, double tolerance, int stopWtheta) + void gaugeFixingFFT(GaugeField& data, int steps, int verbose_interval, + double alpha0, QudaBoolean autotune, double tolerance, QudaBoolean theta_condition) { - // We hardcode the value of autotune to 1 at this point as it is the last - // point before computation begins. This ensures that the the user cannot - // override alpha autotuning. This is done because it is very easy for the - // FFT gauge fixing to fail with a poorly chosen value of alpha, but - // autotuning ensures optimal behaviour. - // Users who wish to change this behaviour may remove the follwing line - // of code and recompile to regain control of alpha autotuning. - autotune = 1; - TimeProfile profileInternalGaugeFixFFT("InternalGaugeFixQudaFFT", false); profileInternalGaugeFixFFT.TPSTART(QUDA_PROFILE_COMPUTE); if (getVerbosity() >= QUDA_SUMMARIZE) { - if(autotune == 1) printfQuda("\tAuto tune active: alpha will be adjusted as the algorithm progresses\n"); - else if(autotune == 0) printfQuda("\tAuto tune not active: alpha will remain constant as the algorithm progresses\n"); - else errorQuda("Unknown value of autotune = %d", autotune); - + if(autotune == QUDA_BOOLEAN_TRUE) printfQuda("\tAuto tune active: alpha will be adjusted as the algorithm progresses\n"); + else printfQuda("\tAuto tune not active: alpha will remain constant as the algorithm progresses\n"); printfQuda("\tAlpha parameter of the Steepest Descent Method: %e\n", alpha0); printfQuda("\tTolerance: %e\n", tolerance); - printfQuda("\tStop criterion method: %s\n", stopWtheta ? "Theta" : "Delta"); - printfQuda("\tMaximum number of iterations: %d\n", Nsteps); + printfQuda("\tStop criterion method: %s\n", theta_condition == QUDA_BOOLEAN_TRUE ? "Theta" : "Delta"); + printfQuda("\tMaximum number of iterations: %d\n", steps); printfQuda("\tPrint convergence results at every %d steps\n", verbose_interval); } @@ -233,7 +222,7 @@ namespace quda { double diff = 0.0; int iter = 0; - for (iter = 0; iter < Nsteps; iter++) { + for (iter = 0; iter < steps; iter++) { for (int k = 0; k < 6; k++) { //------------------------------------------------------------------------ // Set a pointer do the element k in lattice volume @@ -298,7 +287,7 @@ namespace quda { diff = abs(action0 - action); if ((iter % verbose_interval) == (verbose_interval - 1) && getVerbosity() >= QUDA_SUMMARIZE) printf("Step: %05d\tAction: %.16e\ttheta: %.16e\tDelta: %.16e\n", iter + 1, argQ.getAction(), argQ.getTheta(), diff); - if ( autotune && ((action - action0) < -1e-14) ) { + if ( autotune == QUDA_BOOLEAN_TRUE && ((action - action0) < -1e-14) ) { if ( arg.alpha > 0.01 ) { arg.alpha = 0.95 * arg.alpha; if(getVerbosity() >= QUDA_SUMMARIZE) printf("Changing alpha down -> %.4e\n", arg.alpha); @@ -307,7 +296,7 @@ namespace quda { //------------------------------------------------------------------------ // Check gauge fix quality criterion //------------------------------------------------------------------------ - if ( stopWtheta ) { if ( argQ.getTheta() < tolerance ) break; } + if ( theta_condition == QUDA_BOOLEAN_TRUE ) { if ( argQ.getTheta() < tolerance ) break; } else { if ( diff < tolerance ) break; } action0 = action; @@ -368,21 +357,22 @@ namespace quda { gflops = (gflops * 1e-9) / (secs); gbytes = gbytes / (secs * 1e9); - if (getVerbosity() > QUDA_SUMMARIZE) printfQuda("Time: %6.6f s, Gflop/s = %6.1f, GB/s = %6.1f\n", secs, gflops, gbytes); - + if (getVerbosity() > QUDA_SUMMARIZE) + printfQuda("Time: %6.6f s, Gflop/s = %6.1f, GB/s = %6.1f\n", secs, gflops, gbytes); + host_free(num_failures_h); } template struct GaugeFixingFFT { - GaugeFixingFFT(GaugeField& data, int gauge_dir, int Nsteps, int verbose_interval, - double alpha, int autotune, double tolerance, int stopWtheta) + GaugeFixingFFT(GaugeField& data, int gauge_dir, int steps, int verbose_interval, + double alpha, QudaBoolean autotune, double tolerance, QudaBoolean theta_condition) { if (gauge_dir != 3) { if (getVerbosity() > QUDA_SUMMARIZE) printfQuda("Starting Landau gauge fixing with FFTs...\n"); - gaugeFixingFFT(data, Nsteps, verbose_interval, alpha, autotune, tolerance, stopWtheta); + gaugeFixingFFT(data, steps, verbose_interval, alpha, autotune, tolerance, theta_condition); } else { if (getVerbosity() > QUDA_SUMMARIZE) printfQuda("Starting Coulomb gauge fixing with FFTs...\n"); - gaugeFixingFFT(data, Nsteps, verbose_interval, alpha, autotune, tolerance, stopWtheta); + gaugeFixingFFT(data, steps, verbose_interval, alpha, autotune, tolerance, theta_condition); } } }; @@ -391,22 +381,22 @@ namespace quda { * @brief Gauge fixing with Steepest descent method with FFTs with support for single GPU only. * @param[in,out] data, quda gauge field * @param[in] gauge_dir, 3 for Coulomb gauge fixing, other for Landau gauge fixing - * @param[in] Nsteps, maximum number of steps to perform gauge fixing + * @param[in] steps, maximum number of steps to perform gauge fixing * @param[in] verbose_interval, print gauge fixing info when iteration count is a multiple of this * @param[in] alpha, gauge fixing parameter of the method, most common value is 0.08 - * @param[in] autotune (legacy), 1 to autotune the method, i.e., if the Fg inverts its tendency we decrease the alpha value. We hardcode this to true. - * @param[in] tolerance, torelance value to stop the method, if this value is zero then the method stops when iteration reachs the maximum number of steps defined by Nsteps - * @param[in] stopWtheta, 0 for MILC criterion and 1 to use the theta value + * @param[in] autotune QUDA_BOOLEAN_TRUE to autotune the method, i.e., if the fix quality inverts its tendency we decrease the alpha value. + * @param[in] tolerance, torelance value to stop the method, if this value is zero then the method stops when iteration reachs the maximum number of steps defined by steps + * @param[in] theta_condition, QUDA_BOOLEAN_FALSE for MILC criterion and QUDA_BOOLEAN_TRUE to use the theta value */ #if defined(GPU_GAUGE_ALG) - void gaugeFixingFFT(GaugeField& data, const int gauge_dir, const int Nsteps, const int verbose_interval, const double alpha, - const int autotune, const double tolerance, const int stopWtheta) + void gaugeFixingFFT(GaugeField& data, const int gauge_dir, const int steps, const int verbose_interval, const double alpha, + const QudaBoolean autotune, const double tolerance, const QudaBoolean theta_condition) { if (comm_partitioned()) errorQuda("Gauge Fixing with FFTs in multi-GPU support NOT implemented yet!"); - instantiate(data, gauge_dir, Nsteps, verbose_interval, alpha, autotune, tolerance, stopWtheta); + instantiate(data, gauge_dir, steps, verbose_interval, alpha, autotune, tolerance, theta_condition); } #else - void gaugeFixingFFT(GaugeField&, const int, const int, const int, const double, const int, const double, const int) + void gaugeFixingFFT(GaugeField&, const int, const int, const int, const double, const QudaBoolean, const double, const QudaBoolean) { errorQuda("Gauge fixing has bot been built"); } diff --git a/lib/gauge_fix_ovr.cu b/lib/gauge_fix_ovr.cu index 93a3521530..e56e5e05c5 100644 --- a/lib/gauge_fix_ovr.cu +++ b/lib/gauge_fix_ovr.cu @@ -223,9 +223,9 @@ namespace quda { }; template - void gaugeFixingOVR(GaugeField &data,const int Nsteps, const int verbose_interval, + void gaugeFixingOVR(GaugeField &data, const int steps, const int verbose_interval, const double relax_boost, const double tolerance, - const int reunit_interval, const int stopWtheta) + const int reunit_interval, const QudaBoolean theta_condition) { TimeProfile profileInternalGaugeFixOVR("InternalGaugeFixQudaOVR", false); @@ -236,8 +236,8 @@ namespace quda { if (getVerbosity() >= QUDA_SUMMARIZE) { printfQuda("\tOverrelaxation boost parameter: %e\n", relax_boost); printfQuda("\tTolerance: %le\n", tolerance); - printfQuda("\tStop criterion method: %s\n", stopWtheta ? "Theta" : "Delta"); - printfQuda("\tMaximum number of iterations: %d\n", Nsteps); + printfQuda("\tStop criterion method: %s\n", theta_condition == QUDA_BOOLEAN_TRUE ? "Theta" : "Delta"); + printfQuda("\tMaximum number of iterations: %d\n", steps); printfQuda("\tReunitarize at every %d steps\n", reunit_interval); printfQuda("\tPrint convergence results at every %d steps\n", verbose_interval); } @@ -324,7 +324,7 @@ namespace quda { GaugeFix gfixBorderPoints(data, relax_boost, borderpoints, true, threads); int iter = 0; - for (iter = 0; iter < Nsteps; iter++) { + for (iter = 0; iter < steps; iter++) { for (int p = 0; p < 2; p++) { if (comm_partitioned()) { gfixBorderPoints.setParity(p); //compute border points @@ -414,7 +414,7 @@ namespace quda { double diff = abs(action0 - action); if ((iter % verbose_interval) == (verbose_interval - 1) && getVerbosity() >= QUDA_VERBOSE) printfQuda("Step: %05d\tAction: %.16e\ttheta: %.16e\tDelta: %.16e\n", iter + 1, argQ.getAction(), argQ.getTheta(), diff); - if (stopWtheta) { + if (theta_condition == QUDA_BOOLEAN_TRUE) { if (argQ.getTheta() < tolerance) break; } else { if ( diff < tolerance ) break; @@ -470,15 +470,15 @@ namespace quda { } template struct GaugeFixingOVR { - GaugeFixingOVR(GaugeField& data, const int gauge_dir, const int Nsteps, const int verbose_interval, - const double relax_boost, const double tolerance, const int reunit_interval, const int stopWtheta) + GaugeFixingOVR(GaugeField& data, const int gauge_dir, const int steps, const int verbose_interval, + const double relax_boost, const double tolerance, const int reunit_interval, const QudaBoolean theta_condition) { if (gauge_dir == 4) { if (getVerbosity() >= QUDA_SUMMARIZE) printfQuda("Starting Landau gauge fixing...\n"); - gaugeFixingOVR(data, Nsteps, verbose_interval, relax_boost, tolerance, reunit_interval, stopWtheta); + gaugeFixingOVR(data, steps, verbose_interval, relax_boost, tolerance, reunit_interval, theta_condition); } else if (gauge_dir == 3) { if (getVerbosity() >= QUDA_SUMMARIZE) printfQuda("Starting Coulomb gauge fixing...\n"); - gaugeFixingOVR(data, Nsteps, verbose_interval, relax_boost, tolerance, reunit_interval, stopWtheta); + gaugeFixingOVR(data, steps, verbose_interval, relax_boost, tolerance, reunit_interval, theta_condition); } else { errorQuda("Unexpected gauge_dir = %d", gauge_dir); } @@ -489,21 +489,21 @@ namespace quda { * @brief Gauge fixing with overrelaxation with support for single and multi GPU. * @param[in,out] data, quda gauge field * @param[in] gauge_dir, 3 for Coulomb gauge fixing, other for Landau gauge fixing - * @param[in] Nsteps, maximum number of steps to perform gauge fixing + * @param[in] steps, maximum number of steps to perform gauge fixing * @param[in] verbose_interval, print gauge fixing info when iteration count is a multiple of this * @param[in] relax_boost, gauge fixing parameter of the overrelaxation method, most common value is 1.5 or 1.7. - * @param[in] tolerance, torelance value to stop the method, if this value is zero then the method stops when iteration reachs the maximum number of steps defined by Nsteps + * @param[in] tolerance, torelance value to stop the method, if this value is zero then the method stops when iteration reachs the maximum number of steps defined by steps * @param[in] reunit_interval, reunitarize gauge field when iteration count is a multiple of this - * @param[in] stopWtheta, 0 for MILC criterion and 1 to use the theta value + * @param[in] theta_condition, QUDA_BOOLEAN_FALSE for MILC criterion and QUDA_BOOLEAN_TRUE to use the theta value */ #ifdef GPU_GAUGE_ALG - void gaugeFixingOVR(GaugeField& data, const int gauge_dir, const int Nsteps, const int verbose_interval, const double relax_boost, - const double tolerance, const int reunit_interval, const int stopWtheta) + void gaugeFixingOVR(GaugeField& data, const int gauge_dir, const int steps, const int verbose_interval, const double relax_boost, + const double tolerance, const int reunit_interval, const QudaBoolean theta_condition) { - instantiate(data, gauge_dir, Nsteps, verbose_interval, relax_boost, tolerance, reunit_interval, stopWtheta); + instantiate(data, gauge_dir, steps, verbose_interval, relax_boost, tolerance, reunit_interval, theta_condition); } #else - void gaugeFixingOVR(GaugeField&, const int, const int, const int, const double, const double, const int, const int) + void gaugeFixingOVR(GaugeField&, const int, const int, const int, const double, const double, const int, const QudaBoolean) { errorQuda("Gauge fixing has not been built"); } diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp index dc395a1e57..985c985f9e 100644 --- a/lib/interface_quda.cpp +++ b/lib/interface_quda.cpp @@ -234,8 +234,7 @@ static TimeProfile profileMomAction("momActionQuda"); static TimeProfile profileEnd("endQuda"); //!< Profiler for GaugeFixing -static TimeProfile profileGaugeFixFFT("gaugeFixFFTQuda"); -static TimeProfile profileGaugeFixOVR("gaugeFixOVRQuda"); +static TimeProfile profileGaugeFix("gaugeFixQuda"); //!< Profiler for toal time spend between init and end static TimeProfile profileInit2End("initQuda-endQuda",false); @@ -1547,8 +1546,7 @@ void endQuda(void) profileProject.Print(); profilePhase.Print(); profileMomAction.Print(); - profileGaugeFixOVR.Print(); - profileGaugeFixFFT.Print(); + profileGaugeFix.Print(); profileEnd.Print(); profileInit2End.Print(); @@ -5555,133 +5553,75 @@ void performWFlownStep(unsigned int n_steps, double step_size, int meas_interval popOutputPrefix(); } -int computeGaugeFixingOVRQuda(void *gauge, const unsigned int gauge_dir, const unsigned int Nsteps, - const unsigned int verbose_interval, const double relax_boost, const double tolerance, - const unsigned int reunit_interval, const unsigned int stopWtheta, QudaGaugeParam *param, - double *timeinfo) -{ - profileGaugeFixOVR.TPSTART(QUDA_PROFILE_TOTAL); - - checkGaugeParam(param); - - profileGaugeFixOVR.TPSTART(QUDA_PROFILE_INIT); - GaugeFieldParam gParam(*param, gauge); - auto *cpuGauge = new cpuGaugeField(gParam); - - // gParam.pad = getFatLinkPadding(param->X); - gParam.create = QUDA_NULL_FIELD_CREATE; - gParam.link_type = param->type; - gParam.reconstruct = param->reconstruct; - gParam.setPrecision(gParam.Precision(), true); - auto *cudaInGauge = new cudaGaugeField(gParam); - - profileGaugeFixOVR.TPSTOP(QUDA_PROFILE_INIT); - profileGaugeFixOVR.TPSTART(QUDA_PROFILE_H2D); - - cudaInGauge->loadCPUField(*cpuGauge); - - profileGaugeFixOVR.TPSTOP(QUDA_PROFILE_H2D); - - if (comm_size() == 1) { - // perform the update - profileGaugeFixOVR.TPSTART(QUDA_PROFILE_COMPUTE); - gaugeFixingOVR(*cudaInGauge, gauge_dir, Nsteps, verbose_interval, relax_boost, tolerance, reunit_interval, - stopWtheta); - profileGaugeFixOVR.TPSTOP(QUDA_PROFILE_COMPUTE); - } else { - cudaGaugeField *cudaInGaugeEx = createExtendedGauge(*cudaInGauge, R, profileGaugeFixOVR); - - // Perform the update - profileGaugeFixOVR.TPSTART(QUDA_PROFILE_COMPUTE); - gaugeFixingOVR(*cudaInGaugeEx, gauge_dir, Nsteps, verbose_interval, relax_boost, tolerance, reunit_interval, - stopWtheta); - profileGaugeFixOVR.TPSTOP(QUDA_PROFILE_COMPUTE); - - copyExtendedGauge(*cudaInGauge, *cudaInGaugeEx, QUDA_CUDA_FIELD_LOCATION); - } - - // Copy the gauge field back to the host - profileGaugeFixOVR.TPSTART(QUDA_PROFILE_D2H); - cudaInGauge->saveCPUField(*cpuGauge); - profileGaugeFixOVR.TPSTOP(QUDA_PROFILE_D2H); - - profileGaugeFixOVR.TPSTOP(QUDA_PROFILE_TOTAL); - - if (param->make_resident_gauge) { - if (gaugePrecise != nullptr) delete gaugePrecise; - gaugePrecise = cudaInGauge; - } else { - delete cudaInGauge; - } - - if(timeinfo){ - timeinfo[0] = profileGaugeFixOVR.Last(QUDA_PROFILE_H2D); - timeinfo[1] = profileGaugeFixOVR.Last(QUDA_PROFILE_COMPUTE); - timeinfo[2] = profileGaugeFixOVR.Last(QUDA_PROFILE_D2H); - } - - return 0; -} - -int computeGaugeFixingFFTQuda(void* gauge, const unsigned int gauge_dir, const unsigned int Nsteps, \ - const unsigned int verbose_interval, const double alpha, const unsigned int autotune, const double tolerance, \ - const unsigned int stopWtheta, QudaGaugeParam* param , double* timeinfo) -{ - profileGaugeFixFFT.TPSTART(QUDA_PROFILE_TOTAL); - - checkGaugeParam(param); - - profileGaugeFixFFT.TPSTART(QUDA_PROFILE_INIT); - - GaugeFieldParam gParam(*param, gauge); - auto *cpuGauge = new cpuGaugeField(gParam); - - //gParam.pad = getFatLinkPadding(param->X); - gParam.create = QUDA_NULL_FIELD_CREATE; - gParam.link_type = param->type; - gParam.reconstruct = param->reconstruct; - gParam.setPrecision(gParam.Precision(), true); - auto *cudaInGauge = new cudaGaugeField(gParam); - - profileGaugeFixFFT.TPSTOP(QUDA_PROFILE_INIT); +int computeGaugeFixingQuda(void *gauge, QudaGaugeParam *g_param, QudaGaugeFixParam *fix_param, double *timeinfo) +{ + profileGaugeFix.TPSTART(QUDA_PROFILE_TOTAL); - profileGaugeFixFFT.TPSTART(QUDA_PROFILE_H2D); + // Check parameters + checkGaugeParam(g_param); + checkGaugeFixParam(fix_param); + // Create host and device fields + profileGaugeFix.TPSTART(QUDA_PROFILE_INIT); + GaugeFieldParam gauge_param(*g_param, gauge); + auto *cpuGauge = new cpuGaugeField(gauge_param); + gauge_param.create = QUDA_NULL_FIELD_CREATE; + gauge_param.link_type = g_param->type; + gauge_param.reconstruct = g_param->reconstruct; + gauge_param.setPrecision(gauge_param.Precision(), true); + auto *cudaInGauge = new cudaGaugeField(gauge_param); + profileGaugeFix.TPSTOP(QUDA_PROFILE_INIT); + + // Load gauge to device + profileGaugeFix.TPSTART(QUDA_PROFILE_H2D); cudaInGauge->loadCPUField(*cpuGauge); - - profileGaugeFixFFT.TPSTOP(QUDA_PROFILE_H2D); - - // perform the update - profileGaugeFixFFT.TPSTART(QUDA_PROFILE_COMPUTE); - - // We hardcode the value of autotune to 1 in the kernel call (lib/gauge_fix_fft.cu) - // This ensures that the user can not override alpha autotuning. This is done because - // it is very easy for the FFT gauge fixing to fail with a poorly chosen value of - // alpha, but autotuning alpha ensures optimal behaviour. - // Users who wish to change this behaviour may read the comment in - // lib/gauge_fix_fft.cu to regain control. - gaugeFixingFFT(*cudaInGauge, gauge_dir, Nsteps, verbose_interval, alpha, autotune, tolerance, stopWtheta); + profileGaugeFix.TPSTOP(QUDA_PROFILE_H2D); - profileGaugeFixFFT.TPSTOP(QUDA_PROFILE_COMPUTE); + // Perform the update + switch(fix_param->fix_type) { + + case QUDA_GAUGEFIX_TYPE_OVR: + if (comm_size() == 1) { + profileGaugeFix.TPSTART(QUDA_PROFILE_COMPUTE); + gaugeFixingOVR(*cudaInGauge, fix_param->gauge_dir, fix_param->maxiter, fix_param->verbosity_interval, fix_param->ovr_relaxation_boost, fix_param->tolerance, fix_param->reunit_interval, fix_param->theta_condition); + profileGaugeFix.TPSTOP(QUDA_PROFILE_COMPUTE); + } else { + // For MPI, we must perform a halo exchange + cudaGaugeField *cudaInGaugeEx = createExtendedGauge(*cudaInGauge, R, profileGaugeFix); + profileGaugeFix.TPSTART(QUDA_PROFILE_COMPUTE); + gaugeFixingOVR(*cudaInGaugeEx, fix_param->gauge_dir, fix_param->maxiter, fix_param->verbosity_interval, fix_param->ovr_relaxation_boost, fix_param->tolerance, fix_param->reunit_interval, fix_param->theta_condition); + profileGaugeFix.TPSTOP(QUDA_PROFILE_COMPUTE); + copyExtendedGauge(*cudaInGauge, *cudaInGaugeEx, QUDA_CUDA_FIELD_LOCATION); + } + break; + + case QUDA_GAUGEFIX_TYPE_FFT: + profileGaugeFix.TPSTART(QUDA_PROFILE_COMPUTE); + gaugeFixingFFT(*cudaInGauge, fix_param->gauge_dir, fix_param->maxiter, fix_param->verbosity_interval, fix_param->fft_alpha, fix_param->fft_autotune, fix_param->tolerance, fix_param->theta_condition); + profileGaugeFix.TPSTOP(QUDA_PROFILE_COMPUTE); + break; + + default: + errorQuda("Unkown gauge fix type %d", fix_param->fix_type); + } - // copy the gauge field back to the host - profileGaugeFixFFT.TPSTART(QUDA_PROFILE_D2H); + // Copy the fixed gauge field back to the host + profileGaugeFix.TPSTART(QUDA_PROFILE_D2H); cudaInGauge->saveCPUField(*cpuGauge); - profileGaugeFixFFT.TPSTOP(QUDA_PROFILE_D2H); - - profileGaugeFixFFT.TPSTOP(QUDA_PROFILE_TOTAL); - - if (param->make_resident_gauge) { + profileGaugeFix.TPSTOP(QUDA_PROFILE_D2H); + + profileGaugeFix.TPSTOP(QUDA_PROFILE_TOTAL); + if (g_param->make_resident_gauge) { if (gaugePrecise != nullptr) delete gaugePrecise; gaugePrecise = cudaInGauge; } else { delete cudaInGauge; } - if (timeinfo) { - timeinfo[0] = profileGaugeFixFFT.Last(QUDA_PROFILE_H2D); - timeinfo[1] = profileGaugeFixFFT.Last(QUDA_PROFILE_COMPUTE); - timeinfo[2] = profileGaugeFixFFT.Last(QUDA_PROFILE_D2H); + if(timeinfo){ + timeinfo[0] = profileGaugeFix.Last(QUDA_PROFILE_H2D); + timeinfo[1] = profileGaugeFix.Last(QUDA_PROFILE_COMPUTE); + timeinfo[2] = profileGaugeFix.Last(QUDA_PROFILE_D2H); } return 0; diff --git a/lib/milc_interface.cpp b/lib/milc_interface.cpp index 5d7322672f..e752cc0a2f 100644 --- a/lib/milc_interface.cpp +++ b/lib/milc_interface.cpp @@ -2698,15 +2698,23 @@ void qudaCloverMultishiftInvert(int external_precision, int quda_precision, int void qudaGaugeFixingOVR(int precision, unsigned int gauge_dir, int Nsteps, int verbose_interval, double relax_boost, double tolerance, unsigned int reunit_interval, unsigned int stopWtheta, void *milc_sitelink) { - QudaGaugeParam qudaGaugeParam = newMILCGaugeParam(localDim, + QudaGaugeParam gauge_param = newMILCGaugeParam(localDim, (precision==1) ? QUDA_SINGLE_PRECISION : QUDA_DOUBLE_PRECISION, QUDA_SU3_LINKS); - qudaGaugeParam.reconstruct = QUDA_RECONSTRUCT_NO; + gauge_param.reconstruct = QUDA_RECONSTRUCT_NO; //qudaGaugeParam.reconstruct = QUDA_RECONSTRUCT_12; + QudaGaugeFixParam fix_param = newQudaGaugeFixParam(); + fix_param.gauge_dir = gauge_dir; + fix_param.maxiter = Nsteps; + fix_param.verbosity_interval = verbose_interval; + fix_param.ovr_relaxation_boost = relax_boost; + fix_param.tolerance = tolerance; + fix_param.reunit_interval = reunit_interval; + fix_param.theta_condition = stopWtheta == 0 ? QUDA_BOOLEAN_FALSE : QUDA_BOOLEAN_TRUE; + double timeinfo[3]; - computeGaugeFixingOVRQuda(milc_sitelink, gauge_dir, Nsteps, verbose_interval, relax_boost, tolerance, reunit_interval, stopWtheta, \ - &qudaGaugeParam, timeinfo); + computeGaugeFixingQuda(milc_sitelink, &gauge_param, &fix_param, timeinfo); printfQuda("Time H2D: %lf\n", timeinfo[0]); printfQuda("Time to Compute: %lf\n", timeinfo[1]); @@ -2725,17 +2733,24 @@ void qudaGaugeFixingFFT( int precision, void* milc_sitelink ) { - QudaGaugeParam qudaGaugeParam = newMILCGaugeParam(localDim, + QudaGaugeParam gauge_param = newMILCGaugeParam(localDim, (precision==1) ? QUDA_SINGLE_PRECISION : QUDA_DOUBLE_PRECISION, QUDA_GENERAL_LINKS); - qudaGaugeParam.reconstruct = QUDA_RECONSTRUCT_NO; + gauge_param.reconstruct = QUDA_RECONSTRUCT_NO; //qudaGaugeParam.reconstruct = QUDA_RECONSTRUCT_12; + QudaGaugeFixParam fix_param = newQudaGaugeFixParam(); + fix_param.gauge_dir = gauge_dir; + fix_param.maxiter = Nsteps; + fix_param.verbosity_interval = verbose_interval; + fix_param.fft_alpha = alpha; + fix_param.tolerance = tolerance; + fix_param.theta_condition = stopWtheta == 0 ? QUDA_BOOLEAN_FALSE : QUDA_BOOLEAN_TRUE; + fix_param.fft_autotune = autotune == 0 ? QUDA_BOOLEAN_FALSE : QUDA_BOOLEAN_TRUE; double timeinfo[3]; - computeGaugeFixingFFTQuda(milc_sitelink, gauge_dir, Nsteps, verbose_interval, alpha, autotune, tolerance, stopWtheta, \ - &qudaGaugeParam, timeinfo); - + computeGaugeFixingQuda(milc_sitelink, &gauge_param, &fix_param, timeinfo); + printfQuda("Time H2D: %lf\n", timeinfo[0]); printfQuda("Time to Compute: %lf\n", timeinfo[1]); printfQuda("Time D2H: %lf\n", timeinfo[2]); diff --git a/tests/gauge_alg_ctest.cpp b/tests/gauge_alg_ctest.cpp index 4afa0598ce..154f6f0d36 100644 --- a/tests/gauge_alg_ctest.cpp +++ b/tests/gauge_alg_ctest.cpp @@ -66,29 +66,35 @@ int gf_maxiter = 10000; int gf_verbosity_interval = 100; double gf_ovr_relaxation_boost = 1.5; double gf_fft_alpha = 0.8; +QudaBoolean gf_fft_autotune = QUDA_BOOLEAN_TRUE; int gf_reunit_interval = 10; double gf_tolerance = 1e-6; -bool gf_theta_condition = false; +QudaBoolean gf_theta_condition = QUDA_BOOLEAN_FALSE; +QudaGaugeFixType fix_type = QUDA_GAUGEFIX_TYPE_OVR; void add_gaugefix_option_group(std::shared_ptr quda_app) { + CLI::TransformPairs fix_type_map {{"ovr", QUDA_GAUGEFIX_TYPE_OVR}, + {"fft", QUDA_GAUGEFIX_TYPE_FFT}}; + // Option group for gauge fixing related options auto opgroup = quda_app->add_option_group("gaugefix", "Options controlling gauge fixing tests"); opgroup->add_option("--gf-dir", gf_gauge_dir, - "The orthogonal direction of teh gauge fixing, 3=Coulomb, 4=Landau. (default 4)"); + "The orthogonal direction of the gauge fixing, 3=Coulomb, 4=Landau. (default 4)"); opgroup->add_option("--gf-maxiter", gf_maxiter, - "The maximun number of gauge fixing iterations to be applied (default 10000) "); + "The maximun number of gauge fixing iterations to be applied (default 10000)"); opgroup->add_option("--gf-verbosity-interval", gf_verbosity_interval, "Print the gauge fixing progress every N steps (default 100)"); opgroup->add_option("--gf-ovr-relaxation-boost", gf_ovr_relaxation_boost, "The overrelaxation boost parameter for the overrelaxation method (default 1.5)"); opgroup->add_option("--gf-fft-alpha", gf_fft_alpha, "The Alpha parameter in the FFT method (default 0.8)"); + opgroup->add_option("--gf-fft-autotune", gf_fft_autotune, "Autotune the Alpha parameter in the FFT method (default true)"); opgroup->add_option("--gf-reunit-interval", gf_reunit_interval, "Reunitarise the gauge field every N steps (default 10)"); opgroup->add_option("--gf-tol", gf_tolerance, "The tolerance of the gauge fixing quality (default 1e-6)"); - opgroup->add_option( - "--gf-theta-condition", gf_theta_condition, - "Use the theta value to determine the gauge fixing if true. If false, use the delta value (default false)"); + opgroup->add_option("--gf-theta-condition", gf_theta_condition, + "Use the theta value to determine the gauge fixing if true. If false, use the delta value (default false)"); + opgroup->add_option("--gf-fix-type", fix_type, "The type of algorithm to use for fixing (default ovr)")->transform(CLI::QUDACheckedTransformer(fix_type_map)); } class GaugeAlgTest : public ::testing::Test { @@ -147,9 +153,9 @@ class GaugeAlgTest : public ::testing::Test { { if (execute) { setVerbosity(QUDA_VERBOSE); - param = newQudaGaugeParam(); - + // Setup gauge container. + param = newQudaGaugeParam(); setWilsonGaugeParam(param); param.t_boundary = QUDA_PERIODIC_T; @@ -298,13 +304,7 @@ class GaugeAlgTest : public ::testing::Test { if (execute) { if (!checkDimsPartitioned()) { printfQuda("%s gauge fixing with steepest descent method with FFT\n", gf_gauge_dir == 4 ? "Landau" : "Coulomb"); - // We hardcode the value of autotune to 1 in the kernel call (lib/gauge_fix_fft.cu) - // This ensures that the user can not override alpha autotuning. This is done because - // it is very easy for the FFT gauge fixing to fail with a poorly chosen value of - // alpha, but autotuning alpha ensures optimal behaviour. - // Users who wish to change this behaviour may read the comment in - // lib/gauge_fix_fft.cu to regain control. - gaugeFixingFFT(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, 1, gf_tolerance, + gaugeFixingFFT(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, gf_theta_condition); auto plaq_gf = plaquette(*U); @@ -393,13 +393,7 @@ TEST_F(GaugeAlgTest, Landau_FFT) if (execute) { if (!comm_partitioned()) { printfQuda("Landau gauge fixing with steepest descent method with FFT\n"); - // We hardcode the value of autotune to 1 in the kernel call (lib/gauge_fix_fft.cu) - // This ensures that the user can not override alpha autotuning. This is done because - // it is very easy for the FFT gauge fixing to fail with a poorly chosen value of - // alpha, but autotuning alpha ensures optimal behaviour. - // Users who wish to change this behaviour may read the comment in - // lib/gauge_fix_fft.cu to regain control. - gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, 1, gf_tolerance, + gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, gf_theta_condition); auto plaq_gf = plaquette(*U); printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z); @@ -415,13 +409,7 @@ TEST_F(GaugeAlgTest, Coulomb_FFT) if (execute) { if (!comm_partitioned()) { printfQuda("Coulomb gauge fixing with steepest descent method with FFT\n"); - // We hardcode the value of autotune to 1 in the kernel call (lib/gauge_fix_fft.cu) - // This ensures that the user can not override alpha autotuning. This is done because - // it is very easy for the FFT gauge fixing to fail with a poorly chosen value of - // alpha, but autotuning alpha ensures optimal behaviour. - // Users who wish to change this behaviour may read the comment in - // lib/gauge_fix_fft.cu to regain control. - gaugeFixingFFT(*U, 3, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, 1, gf_tolerance, + gaugeFixingFFT(*U, 3, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, gf_theta_condition); auto plaq_gf = plaquette(*U); printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z); From 5ef51f73c478ea6d90e12c6d5621ac9052cbb530 Mon Sep 17 00:00:00 2001 From: cpviolator Date: Fri, 29 Oct 2021 17:44:49 -0700 Subject: [PATCH 21/32] Use host_timer_t in gauge_alg_ctest.cpp --- tests/gauge_alg_ctest.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/gauge_alg_ctest.cpp b/tests/gauge_alg_ctest.cpp index 154f6f0d36..6f52545616 100644 --- a/tests/gauge_alg_ctest.cpp +++ b/tests/gauge_alg_ctest.cpp @@ -102,7 +102,7 @@ class GaugeAlgTest : public ::testing::Test { protected: QudaGaugeParam param; - device_timer_t device_timer_1, device_timer_2; + host_timer_t host_timer_1, host_timer_2; double2 detu; double3 plaq; cudaGaugeField *U; @@ -164,7 +164,7 @@ class GaugeAlgTest : public ::testing::Test { int *num_failures_d = (int *)get_mapped_device_pointer(num_failures_h); SetReunitarizationConsts(); - device_timer_1.start(); + host_timer_1.start(); // If no field is loaded, create a physical quenched field on the device if (!gauge_load) { @@ -187,7 +187,7 @@ class GaugeAlgTest : public ::testing::Test { novrsteps = heatbath_num_overrelax_per_step; coldstart = heatbath_coldstart; beta_value = heatbath_beta_value; - device_timer_2.start(); + host_timer_2.start(); if (coldstart) InitGaugeField(*U); @@ -207,8 +207,8 @@ class GaugeAlgTest : public ::testing::Test { printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z); } - device_timer_2.stop(); - printfQuda("Time Monte -> %.6f s\n", device_timer_2.last()); + host_timer_2.stop(); + printfQuda("Time Monte -> %.6f s\n", host_timer_2.last()); } else { // If a field is loaded, create a device field and copy @@ -276,8 +276,8 @@ class GaugeAlgTest : public ::testing::Test { // Release all temporary memory used for data exchange between GPUs in multi-GPU mode PGaugeExchangeFree(); - device_timer_1.stop(); - printfQuda("Time -> %.6f s\n", device_timer_1.last()); + host_timer_1.stop(); + printfQuda("Time -> %.6f s\n", host_timer_1.last()); } // If we performed a specific instance, switch off the // Google testing. From 97b4c0b55924d533306ac18d0a98c45ea953418e Mon Sep 17 00:00:00 2001 From: cpviolator Date: Fri, 29 Oct 2021 17:45:32 -0700 Subject: [PATCH 22/32] Clang tidy --- include/enum_quda_fortran.h | 1 - include/gauge_tools.h | 9 ++++-- include/quda.h | 13 ++++---- lib/check_params.h | 1 - lib/interface_quda.cpp | 30 ++++++++++-------- lib/milc_interface.cpp | 14 ++++----- tests/gauge_alg_ctest.cpp | 61 +++++++++++++++++++------------------ tests/su3_test.cpp | 2 +- 8 files changed, 69 insertions(+), 62 deletions(-) diff --git a/include/enum_quda_fortran.h b/include/enum_quda_fortran.h index 7614e2bbdc..9207e265af 100644 --- a/include/enum_quda_fortran.h +++ b/include/enum_quda_fortran.h @@ -503,7 +503,6 @@ #define QUDA_GAUGEFIX_TYPE_FFT 1 #define QUDA_GAUGEFIX_TYPE_INVALID QUDA_INVALID_ENUM - #define QudaExtLibType integer(4) #define QUDA_CUSOLVE_EXTLIB 0 #define QUDA_EIGEN_EXTLIB 1 diff --git a/include/gauge_tools.h b/include/gauge_tools.h index 25b1691195..e067f1b7f2 100644 --- a/include/gauge_tools.h +++ b/include/gauge_tools.h @@ -124,7 +124,8 @@ namespace quda * @param[in] theta_condition, QUDA_BOOLEAN_FALSE for MILC criterion and QUDA_BOOLEAN_TRUE to use the theta value */ void gaugeFixingOVR(GaugeField &data, const int gauge_dir, const int steps, const int verbose_interval, - const double relax_boost, const double tolerance, const int reunit_interval, const QudaBoolean theta_condition); + const double relax_boost, const double tolerance, const int reunit_interval, + const QudaBoolean theta_condition); /** * @brief Gauge fixing with Steepest descent method with FFTs with support for single GPU only. @@ -133,14 +134,16 @@ namespace quda * @param[in] steps, maximum number of steps to perform gauge fixing * @param[in] verbose_interval, print gauge fixing info when iteration count is a multiple of this * @param[in] alpha, gauge fixing parameter of the method, most common value is 0.08 - * @param[in] autotune, QUDA_BOOLEAN_TRUE to autotune the method, i.e., if the fix quality inverts its tendency we decrease the alpha value + * @param[in] autotune, QUDA_BOOLEAN_TRUE to autotune the method, i.e., if the fix quality inverts its tendency we + * decrease the alpha value * @param[in] tolerance, torelance value to stop the method, if this * value is zero then the method stops when iteration reachs the * maximum number of steps defined by steps * @param[in] theta_condition, QUDA_BOOLEAN_FALSE for MILC criterion and QUDA_BOOLEAN_TRUE to use the theta value */ void gaugeFixingFFT(GaugeField &data, const int gauge_dir, const int steps, const int verbose_interval, - const double alpha, const QudaBoolean autotune, const double tolerance, const QudaBoolean theta_condition); + const double alpha, const QudaBoolean autotune, const double tolerance, + const QudaBoolean theta_condition); /** @brief Compute the Fmunu tensor diff --git a/include/quda.h b/include/quda.h index 3401cc4f67..e4c34e0029 100644 --- a/include/quda.h +++ b/include/quda.h @@ -791,10 +791,10 @@ extern "C" { QudaBoolean fft_autotune; /**< Autotune the Alpha parameter in the FFT method (default true) */ int reunit_interval; /**< Reunitarise the gauge field every N steps (default 10) */ double tolerance; /**< The tolerance of the gauge fixing quality (default 1e-6) */ - QudaBoolean theta_condition; /**< "Use the theta value to determine the gauge fixing if true. If false, use the delta value (default false)" */ + QudaBoolean theta_condition; /**< "Use the theta value to determine the gauge fixing if true. If false, use the + delta value (default false)" */ } QudaGaugeFixParam; - /* * Interface functions, found in interface_quda.cpp */ @@ -1021,7 +1021,7 @@ extern "C" { * @param param The QudaGaugeFixParam whose elements we are to print. */ void printQudaGaugeFixParam(QudaGaugeFixParam *param); - + /** * Load the gauge field from the host. * @param h_gauge Base pointer to host gauge field (regardless of dimensionality) @@ -1536,14 +1536,15 @@ extern "C" { const int *X); /** - * @brief Gauge fixing with overrelaxation with support for single and multi GPU, and steepest descent FFT with support for single GPU only. + * @brief Gauge fixing with overrelaxation with support for single and multi GPU, and steepest descent FFT with + * support for single GPU only. * @param[in,out] gauge, gauge field to be fixed * @param[in] gauge_param The parameters of the external fields and the computation settings * @param[in] fix_param Container for the gauge fixing algorithm and parameters to use. - * @param[out] timeinfo Array to track timings + * @param[out] timeinfo Array to track timings */ int computeGaugeFixingQuda(void *gauge, QudaGaugeParam *gauge_param, QudaGaugeFixParam *fix_param, double *timeinfo); - + /** * @brief Strided Batched GEMM * @param[in] arrayA The array containing the A matrix data diff --git a/lib/check_params.h b/lib/check_params.h index 28e8370fa6..be21e3c8fb 100644 --- a/lib/check_params.h +++ b/lib/check_params.h @@ -1092,7 +1092,6 @@ void printQudaGaugeFixParam(QudaGaugeFixParam *param) #endif } - // clean up #undef INVALID_INT #undef INVALID_DOUBLE diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp index 985c985f9e..9829634ac4 100644 --- a/lib/interface_quda.cpp +++ b/lib/interface_quda.cpp @@ -5554,7 +5554,7 @@ void performWFlownStep(unsigned int n_steps, double step_size, int meas_interval } int computeGaugeFixingQuda(void *gauge, QudaGaugeParam *g_param, QudaGaugeFixParam *fix_param, double *timeinfo) -{ +{ profileGaugeFix.TPSTART(QUDA_PROFILE_TOTAL); // Check parameters @@ -5576,40 +5576,44 @@ int computeGaugeFixingQuda(void *gauge, QudaGaugeParam *g_param, QudaGaugeFixPar profileGaugeFix.TPSTART(QUDA_PROFILE_H2D); cudaInGauge->loadCPUField(*cpuGauge); profileGaugeFix.TPSTOP(QUDA_PROFILE_H2D); - + // Perform the update - switch(fix_param->fix_type) { - + switch (fix_param->fix_type) { + case QUDA_GAUGEFIX_TYPE_OVR: if (comm_size() == 1) { profileGaugeFix.TPSTART(QUDA_PROFILE_COMPUTE); - gaugeFixingOVR(*cudaInGauge, fix_param->gauge_dir, fix_param->maxiter, fix_param->verbosity_interval, fix_param->ovr_relaxation_boost, fix_param->tolerance, fix_param->reunit_interval, fix_param->theta_condition); + gaugeFixingOVR(*cudaInGauge, fix_param->gauge_dir, fix_param->maxiter, fix_param->verbosity_interval, + fix_param->ovr_relaxation_boost, fix_param->tolerance, fix_param->reunit_interval, + fix_param->theta_condition); profileGaugeFix.TPSTOP(QUDA_PROFILE_COMPUTE); } else { // For MPI, we must perform a halo exchange cudaGaugeField *cudaInGaugeEx = createExtendedGauge(*cudaInGauge, R, profileGaugeFix); profileGaugeFix.TPSTART(QUDA_PROFILE_COMPUTE); - gaugeFixingOVR(*cudaInGaugeEx, fix_param->gauge_dir, fix_param->maxiter, fix_param->verbosity_interval, fix_param->ovr_relaxation_boost, fix_param->tolerance, fix_param->reunit_interval, fix_param->theta_condition); + gaugeFixingOVR(*cudaInGaugeEx, fix_param->gauge_dir, fix_param->maxiter, fix_param->verbosity_interval, + fix_param->ovr_relaxation_boost, fix_param->tolerance, fix_param->reunit_interval, + fix_param->theta_condition); profileGaugeFix.TPSTOP(QUDA_PROFILE_COMPUTE); copyExtendedGauge(*cudaInGauge, *cudaInGaugeEx, QUDA_CUDA_FIELD_LOCATION); } break; - + case QUDA_GAUGEFIX_TYPE_FFT: profileGaugeFix.TPSTART(QUDA_PROFILE_COMPUTE); - gaugeFixingFFT(*cudaInGauge, fix_param->gauge_dir, fix_param->maxiter, fix_param->verbosity_interval, fix_param->fft_alpha, fix_param->fft_autotune, fix_param->tolerance, fix_param->theta_condition); + gaugeFixingFFT(*cudaInGauge, fix_param->gauge_dir, fix_param->maxiter, fix_param->verbosity_interval, + fix_param->fft_alpha, fix_param->fft_autotune, fix_param->tolerance, fix_param->theta_condition); profileGaugeFix.TPSTOP(QUDA_PROFILE_COMPUTE); break; - - default: - errorQuda("Unkown gauge fix type %d", fix_param->fix_type); + + default: errorQuda("Unkown gauge fix type %d", fix_param->fix_type); } - + // Copy the fixed gauge field back to the host profileGaugeFix.TPSTART(QUDA_PROFILE_D2H); cudaInGauge->saveCPUField(*cpuGauge); profileGaugeFix.TPSTOP(QUDA_PROFILE_D2H); - + profileGaugeFix.TPSTOP(QUDA_PROFILE_TOTAL); if (g_param->make_resident_gauge) { if (gaugePrecise != nullptr) delete gaugePrecise; diff --git a/lib/milc_interface.cpp b/lib/milc_interface.cpp index e752cc0a2f..3cac52afcf 100644 --- a/lib/milc_interface.cpp +++ b/lib/milc_interface.cpp @@ -2698,9 +2698,8 @@ void qudaCloverMultishiftInvert(int external_precision, int quda_precision, int void qudaGaugeFixingOVR(int precision, unsigned int gauge_dir, int Nsteps, int verbose_interval, double relax_boost, double tolerance, unsigned int reunit_interval, unsigned int stopWtheta, void *milc_sitelink) { - QudaGaugeParam gauge_param = newMILCGaugeParam(localDim, - (precision==1) ? QUDA_SINGLE_PRECISION : QUDA_DOUBLE_PRECISION, - QUDA_SU3_LINKS); + QudaGaugeParam gauge_param + = newMILCGaugeParam(localDim, (precision == 1) ? QUDA_SINGLE_PRECISION : QUDA_DOUBLE_PRECISION, QUDA_SU3_LINKS); gauge_param.reconstruct = QUDA_RECONSTRUCT_NO; //qudaGaugeParam.reconstruct = QUDA_RECONSTRUCT_12; @@ -2712,7 +2711,7 @@ void qudaGaugeFixingOVR(int precision, unsigned int gauge_dir, int Nsteps, int v fix_param.tolerance = tolerance; fix_param.reunit_interval = reunit_interval; fix_param.theta_condition = stopWtheta == 0 ? QUDA_BOOLEAN_FALSE : QUDA_BOOLEAN_TRUE; - + double timeinfo[3]; computeGaugeFixingQuda(milc_sitelink, &gauge_param, &fix_param, timeinfo); @@ -2733,9 +2732,8 @@ void qudaGaugeFixingFFT( int precision, void* milc_sitelink ) { - QudaGaugeParam gauge_param = newMILCGaugeParam(localDim, - (precision==1) ? QUDA_SINGLE_PRECISION : QUDA_DOUBLE_PRECISION, - QUDA_GENERAL_LINKS); + QudaGaugeParam gauge_param + = newMILCGaugeParam(localDim, (precision == 1) ? QUDA_SINGLE_PRECISION : QUDA_DOUBLE_PRECISION, QUDA_GENERAL_LINKS); gauge_param.reconstruct = QUDA_RECONSTRUCT_NO; //qudaGaugeParam.reconstruct = QUDA_RECONSTRUCT_12; @@ -2750,7 +2748,7 @@ void qudaGaugeFixingFFT( int precision, double timeinfo[3]; computeGaugeFixingQuda(milc_sitelink, &gauge_param, &fix_param, timeinfo); - + printfQuda("Time H2D: %lf\n", timeinfo[0]); printfQuda("Time to Compute: %lf\n", timeinfo[1]); printfQuda("Time D2H: %lf\n", timeinfo[2]); diff --git a/tests/gauge_alg_ctest.cpp b/tests/gauge_alg_ctest.cpp index 6f52545616..a1e3c60507 100644 --- a/tests/gauge_alg_ctest.cpp +++ b/tests/gauge_alg_ctest.cpp @@ -74,9 +74,8 @@ QudaGaugeFixType fix_type = QUDA_GAUGEFIX_TYPE_OVR; void add_gaugefix_option_group(std::shared_ptr quda_app) { - CLI::TransformPairs fix_type_map {{"ovr", QUDA_GAUGEFIX_TYPE_OVR}, - {"fft", QUDA_GAUGEFIX_TYPE_FFT}}; - + CLI::TransformPairs fix_type_map {{"ovr", QUDA_GAUGEFIX_TYPE_OVR}, {"fft", QUDA_GAUGEFIX_TYPE_FFT}}; + // Option group for gauge fixing related options auto opgroup = quda_app->add_option_group("gaugefix", "Options controlling gauge fixing tests"); opgroup->add_option("--gf-dir", gf_gauge_dir, @@ -88,16 +87,20 @@ void add_gaugefix_option_group(std::shared_ptr quda_app) opgroup->add_option("--gf-ovr-relaxation-boost", gf_ovr_relaxation_boost, "The overrelaxation boost parameter for the overrelaxation method (default 1.5)"); opgroup->add_option("--gf-fft-alpha", gf_fft_alpha, "The Alpha parameter in the FFT method (default 0.8)"); - opgroup->add_option("--gf-fft-autotune", gf_fft_autotune, "Autotune the Alpha parameter in the FFT method (default true)"); + opgroup->add_option("--gf-fft-autotune", gf_fft_autotune, + "Autotune the Alpha parameter in the FFT method (default true)"); opgroup->add_option("--gf-reunit-interval", gf_reunit_interval, "Reunitarise the gauge field every N steps (default 10)"); opgroup->add_option("--gf-tol", gf_tolerance, "The tolerance of the gauge fixing quality (default 1e-6)"); - opgroup->add_option("--gf-theta-condition", gf_theta_condition, - "Use the theta value to determine the gauge fixing if true. If false, use the delta value (default false)"); - opgroup->add_option("--gf-fix-type", fix_type, "The type of algorithm to use for fixing (default ovr)")->transform(CLI::QUDACheckedTransformer(fix_type_map)); + opgroup->add_option( + "--gf-theta-condition", gf_theta_condition, + "Use the theta value to determine the gauge fixing if true. If false, use the delta value (default false)"); + opgroup->add_option("--gf-fix-type", fix_type, "The type of algorithm to use for fixing (default ovr)") + ->transform(CLI::QUDACheckedTransformer(fix_type_map)); } -class GaugeAlgTest : public ::testing::Test { +class GaugeAlgTest : public ::testing::Test +{ protected: QudaGaugeParam param; @@ -111,19 +114,17 @@ class GaugeAlgTest : public ::testing::Test { int novrsteps; bool coldstart; double beta_value; - RNG * randstates; - - void SetReunitarizationConsts(){ + RNG *randstates; + + void SetReunitarizationConsts() + { const double unitarize_eps = 1e-14; const double max_error = 1e-10; const int reunit_allow_svd = 1; - const int reunit_svd_only = 0; + const int reunit_svd_only = 0; const double svd_rel_error = 1e-6; const double svd_abs_error = 1e-6; - setUnitarizeLinksConstants(unitarize_eps, max_error, - reunit_allow_svd, reunit_svd_only, - svd_rel_error, svd_abs_error); - + setUnitarizeLinksConstants(unitarize_eps, max_error, reunit_allow_svd, reunit_svd_only, svd_rel_error, svd_abs_error); } bool checkDimsPartitioned() @@ -133,8 +134,9 @@ class GaugeAlgTest : public ::testing::Test { return false; } - bool comparePlaquette(double3 a, double3 b){ - double a0,a1,a2; + bool comparePlaquette(double3 a, double3 b) + { + double a0, a1, a2; a0 = std::abs(a.x - b.x); a1 = std::abs(a.y - b.y); a2 = std::abs(a.z - b.z); @@ -143,7 +145,8 @@ class GaugeAlgTest : public ::testing::Test { return ((a0 < prec_val) && (a1 < prec_val) && (a2 < prec_val)); } - bool CheckDeterminant(double2 detu){ + bool CheckDeterminant(double2 detu) + { double prec_val = 5e-8; if (prec == QUDA_DOUBLE_PRECISION) prec_val = gf_tolerance * 1e2; return (std::abs(1.0 - detu.x) < prec_val && std::abs(detu.y) < prec_val); @@ -153,7 +156,7 @@ class GaugeAlgTest : public ::testing::Test { { if (execute) { setVerbosity(QUDA_VERBOSE); - + // Setup gauge container. param = newQudaGaugeParam(); setWilsonGaugeParam(param); @@ -236,7 +239,7 @@ class GaugeAlgTest : public ::testing::Test { U = new cudaGaugeField(gauge_field_param); U->copy(*host); } - + delete host; // Reunitarization @@ -259,7 +262,7 @@ class GaugeAlgTest : public ::testing::Test { case 2: run_fft(); break; default: errorQuda("Invalid test type %d ", test_type); } - + host_free(num_failures_h); } } @@ -283,11 +286,11 @@ class GaugeAlgTest : public ::testing::Test { // Google testing. if (test_type != 0) execute = false; } - + virtual void run_ovr() { if (execute) { - printfQuda("%s gauge fixing with overrelaxation method\n", gf_gauge_dir == 4 ? "Landau" : "Coulomb"); + printfQuda("%s gauge fixing with overrelaxation method\n", gf_gauge_dir == 4 ? "Landau" : "Coulomb"); gaugeFixingOVR(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, gf_theta_condition); auto plaq_gf = plaquette(*U); @@ -304,7 +307,7 @@ class GaugeAlgTest : public ::testing::Test { if (execute) { if (!checkDimsPartitioned()) { printfQuda("%s gauge fixing with steepest descent method with FFT\n", gf_gauge_dir == 4 ? "Landau" : "Coulomb"); - gaugeFixingFFT(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, + gaugeFixingFFT(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, gf_theta_condition); auto plaq_gf = plaquette(*U); @@ -411,7 +414,7 @@ TEST_F(GaugeAlgTest, Coulomb_FFT) printfQuda("Coulomb gauge fixing with steepest descent method with FFT\n"); gaugeFixingFFT(*U, 3, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, gf_theta_condition); -auto plaq_gf = plaquette(*U); + auto plaq_gf = plaquette(*U); printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z); printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); @@ -425,7 +428,7 @@ int main(int argc, char **argv) // initalize google test, includes command line options ::testing::InitGoogleTest(&argc, argv); - // command line options + // command line options auto app = make_app(); add_gaugefix_option_group(app); add_heatbath_option_group(app); @@ -488,6 +491,6 @@ int main(int argc, char **argv) endQuda(); finalizeComms(); - - return test_rc; + + return test_rc; } diff --git a/tests/su3_test.cpp b/tests/su3_test.cpp index 9f166b6ca6..44b12bb528 100644 --- a/tests/su3_test.cpp +++ b/tests/su3_test.cpp @@ -94,7 +94,7 @@ void add_su3_option_group(std::shared_ptr quda_app) opgroup->add_option("--su3-wflow-type", wflow_type, "The type of action to use in the wilson flow (default wilson)") ->transform(CLI::QUDACheckedTransformer(wflow_type_map)); - + opgroup->add_option("--su3-measurement-interval", measurement_interval, "Measure the field energy and topological charge every Nth step (default 5) "); } From c7801b1c70130696be18e78c5b6a40dfe41eea49 Mon Sep 17 00:00:00 2001 From: cpviolator Date: Fri, 29 Oct 2021 20:37:19 -0700 Subject: [PATCH 23/32] typo in Doxygen comment --- include/quda.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/quda.h b/include/quda.h index e4c34e0029..7650bf38be 100644 --- a/include/quda.h +++ b/include/quda.h @@ -784,7 +784,7 @@ extern "C" { QudaGaugeFixType fix_type; /**< The aglorithm to use for gauge fixing */ int gauge_dir; /**< The orthogonal direction of the gauge fixing, 3=Coulomb, 4=Landau. (default 4) */ - int maxiter; /**< The maximun number of gauge fixing iterations to be applied (default 10000) */ + int maxiter; /**< The maximum number of gauge fixing iterations to be applied (default 10000) */ int verbosity_interval; /**< Print the gauge fixing progress every N steps (default 100) */ double ovr_relaxation_boost; /**< The overrelaxation boost parameter for the overrelaxation method (default 1.5) */ double fft_alpha; /**< The Alpha parameter in the FFT method (default 0.8) */ From 35ee71b65ccb8d905bc63648bf72c52711c35274 Mon Sep 17 00:00:00 2001 From: cpviolator Date: Mon, 1 Nov 2021 15:22:10 -0700 Subject: [PATCH 24/32] Rename gauge_alg to original --- tests/CMakeLists.txt | 10 +++++----- tests/{gauge_alg_ctest.cpp => gauge_alg_test.cpp} | 0 2 files changed, 5 insertions(+), 5 deletions(-) rename tests/{gauge_alg_ctest.cpp => gauge_alg_test.cpp} (100%) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 4beabb3d81..dcd7c8fa4b 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -223,10 +223,10 @@ if(QUDA_FORCE_GAUGE) endif() if(QUDA_GAUGE_ALG) - add_executable(gauge_alg_ctest gauge_alg_ctest.cpp) - target_link_libraries(gauge_alg_ctest ${TEST_LIBS}) - quda_checkbuildtest(gauge_alg_ctest QUDA_BUILD_ALL_TESTS) - install(TARGETS gauge_alg_ctest ${QUDA_EXCLUDE_FROM_INSTALL} DESTINATION ${CMAKE_INSTALL_BINDIR}) + add_executable(gauge_alg_test gauge_alg_test.cpp) + target_link_libraries(gauge_alg_test ${TEST_LIBS}) + quda_checkbuildtest(gauge_alg_test QUDA_BUILD_ALL_TESTS) + install(TARGETS gauge_alg_test ${QUDA_EXCLUDE_FROM_INSTALL} DESTINATION ${CMAKE_INSTALL_BINDIR}) add_executable(heatbath_test heatbath_test.cpp) target_link_libraries(heatbath_test ${TEST_LIBS}) @@ -812,7 +812,7 @@ foreach(prec IN LISTS TEST_PRECS) if(QUDA_GAUGE_ALG) add_test(NAME gauge_alg_${prec} - COMMAND ${QUDA_CTEST_LAUNCH} $ ${MPIEXEC_POSTFLAGS} + COMMAND ${QUDA_CTEST_LAUNCH} $ ${MPIEXEC_POSTFLAGS} --dim 4 6 8 10 --prec ${prec} --gtest_output=xml:gauge_arg_test_${prec}.xml) endif() diff --git a/tests/gauge_alg_ctest.cpp b/tests/gauge_alg_test.cpp similarity index 100% rename from tests/gauge_alg_ctest.cpp rename to tests/gauge_alg_test.cpp From 46bc1a01cc441458f9fbf018b026a4227d3ed922 Mon Sep 17 00:00:00 2001 From: cpviolator Date: Thu, 16 Dec 2021 14:07:51 -0800 Subject: [PATCH 25/32] Add support for host and device gauge input --- lib/check_params.h | 15 +--- lib/interface_quda.cpp | 82 +++++++++++------- tests/gauge_alg_test.cpp | 167 +++++++++++++++++++------------------ tests/utils/host_utils.cpp | 1 + tests/utils/misc.cpp | 13 +++ tests/utils/misc.h | 1 + 6 files changed, 158 insertions(+), 121 deletions(-) diff --git a/lib/check_params.h b/lib/check_params.h index be21e3c8fb..1e09c901be 100644 --- a/lib/check_params.h +++ b/lib/check_params.h @@ -1065,17 +1065,6 @@ void printQudaGaugeFixParam(QudaGaugeFixParam *param) P(struct_size, (size_t)INVALID_INT); #endif -#ifdef INIT_PARAM - P(gauge_dir, 4); - P(maxiter, 10000); - P(verbosity_interval, 100); - P(reunit_interval, 10); - P(ovr_relaxation_boost, 0.0); - P(fft_alpha, 0.0); - P(tolerance, 0.0); - P(fft_autotune, QUDA_BOOLEAN_FALSE); - P(theta_condition, QUDA_BOOLEAN_FALSE); -#else P(gauge_dir, INVALID_INT); P(maxiter, INVALID_INT); P(verbosity_interval, INVALID_INT); @@ -1083,10 +1072,12 @@ void printQudaGaugeFixParam(QudaGaugeFixParam *param) P(ovr_relaxation_boost, INVALID_DOUBLE); P(fft_alpha, INVALID_DOUBLE); P(tolerance, INVALID_DOUBLE); + +#ifndef CHECK_PARAM P(fft_autotune, QUDA_BOOLEAN_FALSE); P(theta_condition, QUDA_BOOLEAN_FALSE); #endif - + #ifdef INIT_PARAM return ret; #endif diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp index 9829634ac4..d8ae3d21e9 100644 --- a/lib/interface_quda.cpp +++ b/lib/interface_quda.cpp @@ -5557,51 +5557,73 @@ int computeGaugeFixingQuda(void *gauge, QudaGaugeParam *g_param, QudaGaugeFixPar { profileGaugeFix.TPSTART(QUDA_PROFILE_TOTAL); + if (!initialized) errorQuda("QUDA not initialized"); + printQudaGaugeParam(g_param); + printQudaGaugeFixParam(fix_param); + // Check parameters checkGaugeParam(g_param); checkGaugeFixParam(fix_param); + cudaGaugeField *device_gauge = nullptr; + cpuGaugeField *cpu_gauge = nullptr; + // Create host and device fields - profileGaugeFix.TPSTART(QUDA_PROFILE_INIT); - GaugeFieldParam gauge_param(*g_param, gauge); - auto *cpuGauge = new cpuGaugeField(gauge_param); - gauge_param.create = QUDA_NULL_FIELD_CREATE; - gauge_param.link_type = g_param->type; - gauge_param.reconstruct = g_param->reconstruct; - gauge_param.setPrecision(gauge_param.Precision(), true); - auto *cudaInGauge = new cudaGaugeField(gauge_param); - profileGaugeFix.TPSTOP(QUDA_PROFILE_INIT); - - // Load gauge to device - profileGaugeFix.TPSTART(QUDA_PROFILE_H2D); - cudaInGauge->loadCPUField(*cpuGauge); - profileGaugeFix.TPSTOP(QUDA_PROFILE_H2D); - + if(g_param->location == QUDA_CPU_FIELD_LOCATION) { + // The gauge field is on the CPU. We must + // create a GPU gauge and transfer. + profileGaugeFix.TPSTART(QUDA_PROFILE_INIT); + GaugeFieldParam gauge_param(*g_param, gauge); + cpu_gauge = new cpuGaugeField(gauge_param); + gauge_param.create = QUDA_NULL_FIELD_CREATE; + gauge_param.link_type = g_param->type; + gauge_param.reconstruct = g_param->reconstruct; + gauge_param.setPrecision(gauge_param.Precision(), true); + device_gauge = new cudaGaugeField(gauge_param); + profileGaugeFix.TPSTOP(QUDA_PROFILE_INIT); + + // Load gauge to device + profileGaugeFix.TPSTART(QUDA_PROFILE_H2D); + device_gauge->loadCPUField(*cpu_gauge); + profileGaugeFix.TPSTOP(QUDA_PROFILE_H2D); + } else { + // The gauge field is on the GPU already, so + // we can just reference that field. + profileGaugeFix.TPSTART(QUDA_PROFILE_INIT); + GaugeFieldParam gauge_param(*g_param, gauge); + gauge_param.create = QUDA_REFERENCE_FIELD_CREATE; + gauge_param.link_type = g_param->type; + gauge_param.reconstruct = g_param->reconstruct; + gauge_param.setPrecision(gauge_param.Precision(), true); + device_gauge = new cudaGaugeField(gauge_param); + profileGaugeFix.TPSTOP(QUDA_PROFILE_INIT); + } + // Perform the update switch (fix_param->fix_type) { case QUDA_GAUGEFIX_TYPE_OVR: if (comm_size() == 1) { profileGaugeFix.TPSTART(QUDA_PROFILE_COMPUTE); - gaugeFixingOVR(*cudaInGauge, fix_param->gauge_dir, fix_param->maxiter, fix_param->verbosity_interval, + gaugeFixingOVR(*device_gauge, fix_param->gauge_dir, fix_param->maxiter, fix_param->verbosity_interval, fix_param->ovr_relaxation_boost, fix_param->tolerance, fix_param->reunit_interval, fix_param->theta_condition); profileGaugeFix.TPSTOP(QUDA_PROFILE_COMPUTE); } else { // For MPI, we must perform a halo exchange - cudaGaugeField *cudaInGaugeEx = createExtendedGauge(*cudaInGauge, R, profileGaugeFix); + cudaGaugeField *device_gauge_extended = createExtendedGauge(*device_gauge, R, profileGaugeFix); profileGaugeFix.TPSTART(QUDA_PROFILE_COMPUTE); - gaugeFixingOVR(*cudaInGaugeEx, fix_param->gauge_dir, fix_param->maxiter, fix_param->verbosity_interval, + gaugeFixingOVR(*device_gauge_extended, fix_param->gauge_dir, fix_param->maxiter, fix_param->verbosity_interval, fix_param->ovr_relaxation_boost, fix_param->tolerance, fix_param->reunit_interval, fix_param->theta_condition); profileGaugeFix.TPSTOP(QUDA_PROFILE_COMPUTE); - copyExtendedGauge(*cudaInGauge, *cudaInGaugeEx, QUDA_CUDA_FIELD_LOCATION); + copyExtendedGauge(*device_gauge, *device_gauge_extended, QUDA_CUDA_FIELD_LOCATION); } break; case QUDA_GAUGEFIX_TYPE_FFT: profileGaugeFix.TPSTART(QUDA_PROFILE_COMPUTE); - gaugeFixingFFT(*cudaInGauge, fix_param->gauge_dir, fix_param->maxiter, fix_param->verbosity_interval, + gaugeFixingFFT(*device_gauge, fix_param->gauge_dir, fix_param->maxiter, fix_param->verbosity_interval, fix_param->fft_alpha, fix_param->fft_autotune, fix_param->tolerance, fix_param->theta_condition); profileGaugeFix.TPSTOP(QUDA_PROFILE_COMPUTE); break; @@ -5609,19 +5631,23 @@ int computeGaugeFixingQuda(void *gauge, QudaGaugeParam *g_param, QudaGaugeFixPar default: errorQuda("Unkown gauge fix type %d", fix_param->fix_type); } - // Copy the fixed gauge field back to the host - profileGaugeFix.TPSTART(QUDA_PROFILE_D2H); - cudaInGauge->saveCPUField(*cpuGauge); - profileGaugeFix.TPSTOP(QUDA_PROFILE_D2H); - + // Copy the fixed gauge field back to the host if it came + // from the host + if(g_param->location == QUDA_CPU_FIELD_LOCATION) { + profileGaugeFix.TPSTART(QUDA_PROFILE_D2H); + device_gauge->saveCPUField(*cpu_gauge); + profileGaugeFix.TPSTOP(QUDA_PROFILE_D2H); + } + profileGaugeFix.TPSTOP(QUDA_PROFILE_TOTAL); + if (g_param->make_resident_gauge) { if (gaugePrecise != nullptr) delete gaugePrecise; - gaugePrecise = cudaInGauge; + gaugePrecise = device_gauge; } else { - delete cudaInGauge; + delete device_gauge; } - + if(timeinfo){ timeinfo[0] = profileGaugeFix.Last(QUDA_PROFILE_H2D); timeinfo[1] = profileGaugeFix.Last(QUDA_PROFILE_COMPUTE); diff --git a/tests/gauge_alg_test.cpp b/tests/gauge_alg_test.cpp index a1e3c60507..a0a068d8a9 100644 --- a/tests/gauge_alg_test.cpp +++ b/tests/gauge_alg_test.cpp @@ -66,11 +66,11 @@ int gf_maxiter = 10000; int gf_verbosity_interval = 100; double gf_ovr_relaxation_boost = 1.5; double gf_fft_alpha = 0.8; -QudaBoolean gf_fft_autotune = QUDA_BOOLEAN_TRUE; +bool gf_fft_autotune = true; int gf_reunit_interval = 10; double gf_tolerance = 1e-6; -QudaBoolean gf_theta_condition = QUDA_BOOLEAN_FALSE; -QudaGaugeFixType fix_type = QUDA_GAUGEFIX_TYPE_OVR; +bool gf_theta_condition = false; +QudaGaugeFixType gf_fix_type = QUDA_GAUGEFIX_TYPE_OVR; void add_gaugefix_option_group(std::shared_ptr quda_app) { @@ -95,15 +95,29 @@ void add_gaugefix_option_group(std::shared_ptr quda_app) opgroup->add_option( "--gf-theta-condition", gf_theta_condition, "Use the theta value to determine the gauge fixing if true. If false, use the delta value (default false)"); - opgroup->add_option("--gf-fix-type", fix_type, "The type of algorithm to use for fixing (default ovr)") + opgroup->add_option("--gf-fix-type", gf_fix_type, "The type of algorithm to use for fixing (default ovr)") ->transform(CLI::QUDACheckedTransformer(fix_type_map)); } +void setGaugeFixParam(QudaGaugeFixParam &fix_param) { + fix_param.fix_type = gf_fix_type; + fix_param.gauge_dir = gf_gauge_dir; + fix_param.maxiter = gf_maxiter; + fix_param.verbosity_interval = gf_verbosity_interval; + fix_param.reunit_interval = gf_reunit_interval; + fix_param.tolerance = gf_tolerance; + fix_param.ovr_relaxation_boost = gf_ovr_relaxation_boost; + fix_param.fft_alpha = gf_fft_alpha; + fix_param.fft_autotune = gf_fft_alpha ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE; + fix_param.theta_condition = gf_theta_condition ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE; +} + class GaugeAlgTest : public ::testing::Test { protected: - QudaGaugeParam param; + QudaGaugeParam gauge_param; + QudaGaugeFixParam fix_param; host_timer_t host_timer_1, host_timer_2; double2 detu; @@ -155,12 +169,11 @@ class GaugeAlgTest : public ::testing::Test virtual void SetUp() { if (execute) { - setVerbosity(QUDA_VERBOSE); - + // Setup gauge container. - param = newQudaGaugeParam(); - setWilsonGaugeParam(param); - param.t_boundary = QUDA_PERIODIC_T; + gauge_param = newQudaGaugeParam(); + setWilsonGaugeParam(gauge_param); + gauge_param.t_boundary = QUDA_PERIODIC_T; // Reunitarization setup int *num_failures_h = (int *)mapped_malloc(sizeof(int)); @@ -171,17 +184,17 @@ class GaugeAlgTest : public ::testing::Test // If no field is loaded, create a physical quenched field on the device if (!gauge_load) { - GaugeFieldParam gParam(param); - gParam.ghostExchange = QUDA_GHOST_EXCHANGE_EXTENDED; - gParam.create = QUDA_NULL_FIELD_CREATE; - gParam.reconstruct = link_recon; - gParam.setPrecision(prec, true); + GaugeFieldParam device_gauge_param(gauge_param); + device_gauge_param.ghostExchange = QUDA_GHOST_EXCHANGE_EXTENDED; + device_gauge_param.create = QUDA_NULL_FIELD_CREATE; + device_gauge_param.reconstruct = link_recon; + device_gauge_param.setPrecision(prec, true); for (int d = 0; d < 4; d++) { - if (comm_dim_partitioned(d)) gParam.r[d] = 2; - gParam.x[d] += 2 * gParam.r[d]; + if (comm_dim_partitioned(d)) device_gauge_param.r[d] = 2; + device_gauge_param.x[d] += 2 * device_gauge_param.r[d]; } - U = new cudaGaugeField(gParam); + U = new cudaGaugeField(device_gauge_param); RNG randstates(*U, 1234); @@ -191,12 +204,10 @@ class GaugeAlgTest : public ::testing::Test coldstart = heatbath_coldstart; beta_value = heatbath_beta_value; host_timer_2.start(); - - if (coldstart) - InitGaugeField(*U); - else - InitGaugeField(*U, randstates); - + + if (coldstart) InitGaugeField(*U); + else InitGaugeField(*U, randstates); + for (int step = 1; step <= nsteps; ++step) { printfQuda("Step %d\n", step); Monte(*U, randstates, beta_value, nhbsteps, novrsteps); @@ -216,27 +227,27 @@ class GaugeAlgTest : public ::testing::Test // If a field is loaded, create a device field and copy printfQuda("Copying gauge field from host\n"); - param.location = QUDA_CPU_FIELD_LOCATION; - GaugeFieldParam gauge_field_param(param, host_gauge); - gauge_field_param.ghostExchange = QUDA_GHOST_EXCHANGE_NO; - GaugeField *host = GaugeField::Create(gauge_field_param); + gauge_param.location = QUDA_CPU_FIELD_LOCATION; + GaugeFieldParam host_gauge_param(gauge_param, host_gauge); + host_gauge_param.ghostExchange = QUDA_GHOST_EXCHANGE_NO; + GaugeField *host = GaugeField::Create(host_gauge_param); // switch the parameters for creating the mirror precise cuda gauge field - gauge_field_param.create = QUDA_NULL_FIELD_CREATE; - gauge_field_param.reconstruct = param.reconstruct; - gauge_field_param.setPrecision(param.cuda_prec, true); + host_gauge_param.create = QUDA_NULL_FIELD_CREATE; + host_gauge_param.reconstruct = gauge_param.reconstruct; + host_gauge_param.setPrecision(gauge_param.cuda_prec, true); if (comm_partitioned()) { int R[4] = {0, 0, 0, 0}; for (int d = 0; d < 4; d++) if (comm_dim_partitioned(d)) R[d] = 2; static TimeProfile GaugeFix("GaugeFix"); - cudaGaugeField *tmp = new cudaGaugeField(gauge_field_param); + cudaGaugeField *tmp = new cudaGaugeField(host_gauge_param); tmp->copy(*host); U = createExtendedGauge(*tmp, R, GaugeFix); delete tmp; } else { - U = new cudaGaugeField(gauge_field_param); + U = new cudaGaugeField(host_gauge_param); U->copy(*host); } @@ -258,8 +269,8 @@ class GaugeAlgTest : public ::testing::Test case 0: // Do the Google testing break; - case 1: run_ovr(); break; - case 2: run_fft(); break; + case 1: run(); break; + //case 2: run_fft(); break; default: errorQuda("Invalid test type %d ", test_type); } @@ -287,12 +298,24 @@ class GaugeAlgTest : public ::testing::Test if (test_type != 0) execute = false; } - virtual void run_ovr() + virtual void run() { if (execute) { - printfQuda("%s gauge fixing with overrelaxation method\n", gf_gauge_dir == 4 ? "Landau" : "Coulomb"); - gaugeFixingOVR(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, - gf_reunit_interval, gf_theta_condition); + // Set gauge fixing params from the command line + fix_param = newQudaGaugeFixParam(); + setGaugeFixParam(fix_param); + + // Setup gauge container. + gauge_param = newQudaGaugeParam(); + setWilsonGaugeParam(gauge_param); + gauge_param.t_boundary = QUDA_PERIODIC_T; + gauge_param.location = QUDA_CUDA_FIELD_LOCATION; + + //GaugeFieldParam param(*U); + printfQuda("%s gauge fixing with %s method\n", fix_param.gauge_dir == 4 ? "Landau" : "Coulomb", get_gaugefix_str(fix_param.fix_type)); + + computeGaugeFixingQuda(U->Gauge_p(), &gauge_param, &fix_param, nullptr); + auto plaq_gf = plaquette(*U); printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z); printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); @@ -302,46 +325,26 @@ class GaugeAlgTest : public ::testing::Test if (gauge_store) save_gauge(); } } - virtual void run_fft() - { - if (execute) { - if (!checkDimsPartitioned()) { - printfQuda("%s gauge fixing with steepest descent method with FFT\n", gf_gauge_dir == 4 ? "Landau" : "Coulomb"); - gaugeFixingFFT(*U, gf_gauge_dir, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, - gf_theta_condition); - - auto plaq_gf = plaquette(*U); - printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z); - printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); - ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); - saveTuneCache(); - // Save if output string is specified - if (gauge_store) save_gauge(); - } else { - errorQuda("Cannot perform FFT gauge fixing with MPI partitions."); - } - } - } - + virtual void save_gauge() { printfQuda("Saving the gauge field to file %s\n", gauge_outfile); - QudaGaugeParam gauge_param = newQudaGaugeParam(); - setWilsonGaugeParam(gauge_param); + //QudaGaugeParam gauge_param = newQudaGaugeParam(); + //setWilsonGaugeParam(gauge_param); void *cpu_gauge[4]; for (int dir = 0; dir < 4; dir++) { cpu_gauge[dir] = safe_malloc(V * gauge_site_size * gauge_param.cpu_prec); } - GaugeFieldParam gParam(param); - gParam.ghostExchange = QUDA_GHOST_EXCHANGE_NO; - gParam.create = QUDA_NULL_FIELD_CREATE; - gParam.link_type = param.type; - gParam.reconstruct = param.reconstruct; - gParam.setPrecision(gParam.Precision(), true); + GaugeFieldParam param(gauge_param); + param.ghostExchange = QUDA_GHOST_EXCHANGE_NO; + param.create = QUDA_NULL_FIELD_CREATE; + param.link_type = gauge_param.type; + param.reconstruct = gauge_param.reconstruct; + param.setPrecision(param.Precision(), true); cudaGaugeField *gauge; - gauge = new cudaGaugeField(gParam); + gauge = new cudaGaugeField(param); // copy into regular field copyExtendedGauge(*gauge, *U, QUDA_CUDA_FIELD_LOCATION); @@ -367,8 +370,8 @@ TEST_F(GaugeAlgTest, Landau_Overrelaxation) { if (execute) { printfQuda("Landau gauge fixing with overrelaxation\n"); - gaugeFixingOVR(*U, 4, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, - gf_theta_condition); + //gaugeFixingOVR(*U, 4, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, + //gf_theta_condition); auto plaq_gf = plaquette(*U); printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z); printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); @@ -381,8 +384,8 @@ TEST_F(GaugeAlgTest, Coulomb_Overrelaxation) { if (execute) { printfQuda("Coulomb gauge fixing with overrelaxation\n"); - gaugeFixingOVR(*U, 3, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, - gf_theta_condition); + //gaugeFixingOVR(*U, 3, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, + //gf_theta_condition); auto plaq_gf = plaquette(*U); printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z); printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); @@ -396,8 +399,8 @@ TEST_F(GaugeAlgTest, Landau_FFT) if (execute) { if (!comm_partitioned()) { printfQuda("Landau gauge fixing with steepest descent method with FFT\n"); - gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, - gf_theta_condition); + //gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, + //gf_theta_condition); auto plaq_gf = plaquette(*U); printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z); printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); @@ -412,8 +415,8 @@ TEST_F(GaugeAlgTest, Coulomb_FFT) if (execute) { if (!comm_partitioned()) { printfQuda("Coulomb gauge fixing with steepest descent method with FFT\n"); - gaugeFixingFFT(*U, 3, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, - gf_theta_condition); + //gaugeFixingFFT(*U, 3, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, + //gf_theta_condition); auto plaq_gf = plaquette(*U); printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z); printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); @@ -445,14 +448,16 @@ int main(int argc, char **argv) // initialize QMP/MPI, QUDA comms grid and RNG (host_utils.cpp) initComms(argc, argv, gridsize_from_cmdline); - + QudaGaugeParam gauge_param = newQudaGaugeParam(); - if (prec_sloppy == QUDA_INVALID_PRECISION) prec_sloppy = prec; - if (link_recon_sloppy == QUDA_RECONSTRUCT_INVALID) link_recon_sloppy = link_recon; - + setVerbosity(QUDA_VERBOSE); + setQudaPrecisions(); setWilsonGaugeParam(gauge_param); setDims(gauge_param.X); + //if (prec_sloppy == QUDA_INVALID_PRECISION) prec_sloppy = prec; + //if (link_recon_sloppy == QUDA_RECONSTRUCT_INVALID) link_recon_sloppy = link_recon; + display_test_info(); gauge_load = strcmp(latfile, ""); diff --git a/tests/utils/host_utils.cpp b/tests/utils/host_utils.cpp index c07d0bcacb..d556db0037 100644 --- a/tests/utils/host_utils.cpp +++ b/tests/utils/host_utils.cpp @@ -69,6 +69,7 @@ void setQudaPrecisions() if (prec_eigensolver == QUDA_INVALID_PRECISION) prec_eigensolver = prec_sloppy; if (prec_precondition == QUDA_INVALID_PRECISION) prec_precondition = prec_sloppy; if (prec_null == QUDA_INVALID_PRECISION) prec_null = prec_precondition; + if (prec_refinement_sloppy == QUDA_INVALID_PRECISION) prec_refinement_sloppy = prec_precondition; if (smoother_halo_prec == QUDA_INVALID_PRECISION) smoother_halo_prec = prec_null; if (link_recon_sloppy == QUDA_RECONSTRUCT_INVALID) link_recon_sloppy = link_recon; if (link_recon_precondition == QUDA_RECONSTRUCT_INVALID) link_recon_precondition = link_recon_sloppy; diff --git a/tests/utils/misc.cpp b/tests/utils/misc.cpp index 58cde21a8f..e8fbd5e5d8 100644 --- a/tests/utils/misc.cpp +++ b/tests/utils/misc.cpp @@ -151,6 +151,19 @@ const char *get_contract_str(QudaContractType type) return ret; } +const char *get_gaugefix_str(QudaGaugeFixType type) +{ + const char *ret; + + switch (type) { + case QUDA_GAUGEFIX_TYPE_OVR: ret = "Overrelaxation"; break; + case QUDA_GAUGEFIX_TYPE_FFT: ret = "FFT"; break; + default: ret = "unknown"; break; + } + + return ret; +} + const char *get_eig_spectrum_str(QudaEigSpectrumType type) { const char *ret; diff --git a/tests/utils/misc.h b/tests/utils/misc.h index 5c35480840..bc4c8a2e4b 100644 --- a/tests/utils/misc.h +++ b/tests/utils/misc.h @@ -21,6 +21,7 @@ const char *get_eig_type_str(QudaEigType type); const char *get_ritz_location_str(QudaFieldLocation type); const char *get_memory_type_str(QudaMemoryType type); const char *get_contract_str(QudaContractType type); +const char *get_gaugefix_str(QudaGaugeFixType type); #define XUP 0 #define YUP 1 From 69752679f16e729ed80053c468f914fc258559ee Mon Sep 17 00:00:00 2001 From: cpviolator Date: Thu, 16 Dec 2021 17:45:02 -0800 Subject: [PATCH 26/32] Use gauge fix param for GPU fix function. Need to fix interface for CPU/MILC --- include/gauge_tools.h | 31 +++----------- include/quda.h | 2 +- lib/check_params.h | 2 +- lib/gauge_fix_fft.cu | 42 +++++++++---------- lib/gauge_fix_ovr.cu | 43 +++++++++---------- lib/interface_quda.cpp | 63 ++++++++++------------------ tests/gauge_alg_test.cpp | 91 +++++++++++++++++++++++----------------- 7 files changed, 123 insertions(+), 151 deletions(-) diff --git a/include/gauge_tools.h b/include/gauge_tools.h index e067f1b7f2..b53134ecfc 100644 --- a/include/gauge_tools.h +++ b/include/gauge_tools.h @@ -113,37 +113,16 @@ namespace quda /** * @brief Gauge fixing with overrelaxation with support for single and multi GPU. * @param[in,out] data, quda gauge field - * @param[in] gauge_dir, 3 for Coulomb gauge fixing, other for Landau gauge fixing - * @param[in] steps, maximum number of steps to perform gauge fixing - * @param[in] verbose_interval, print gauge fixing info when iteration count is a multiple of this - * @param[in] relax_boost, gauge fixing parameter of the overrelaxation method, most common value is 1.5 or 1.7. - * @param[in] tolerance, torelance value to stop the method, if this - * value is zero then the method stops when iteration reachs the - * maximum number of steps defined by steps - * @param[in] reunit_interval, reunitarize gauge field when iteration count is a multiple of this - * @param[in] theta_condition, QUDA_BOOLEAN_FALSE for MILC criterion and QUDA_BOOLEAN_TRUE to use the theta value + * @param[in] fix_param Parameter struct that defines the gauge fixing */ - void gaugeFixingOVR(GaugeField &data, const int gauge_dir, const int steps, const int verbose_interval, - const double relax_boost, const double tolerance, const int reunit_interval, - const QudaBoolean theta_condition); - + void gaugeFixingOVR(GaugeField &data, QudaGaugeFixParam &fix_param); + /** * @brief Gauge fixing with Steepest descent method with FFTs with support for single GPU only. * @param[in,out] data, quda gauge field - * @param[in] gauge_dir, 3 for Coulomb gauge fixing, other for Landau gauge fixing - * @param[in] steps, maximum number of steps to perform gauge fixing - * @param[in] verbose_interval, print gauge fixing info when iteration count is a multiple of this - * @param[in] alpha, gauge fixing parameter of the method, most common value is 0.08 - * @param[in] autotune, QUDA_BOOLEAN_TRUE to autotune the method, i.e., if the fix quality inverts its tendency we - * decrease the alpha value - * @param[in] tolerance, torelance value to stop the method, if this - * value is zero then the method stops when iteration reachs the - * maximum number of steps defined by steps - * @param[in] theta_condition, QUDA_BOOLEAN_FALSE for MILC criterion and QUDA_BOOLEAN_TRUE to use the theta value + * @param[in] fix_param Parameter struct that defines the gauge fixing */ - void gaugeFixingFFT(GaugeField &data, const int gauge_dir, const int steps, const int verbose_interval, - const double alpha, const QudaBoolean autotune, const double tolerance, - const QudaBoolean theta_condition); + void gaugeFixingFFT(GaugeField &data, QudaGaugeFixParam &fix_param); /** @brief Compute the Fmunu tensor diff --git a/include/quda.h b/include/quda.h index 0b7d633ec9..02b8601809 100644 --- a/include/quda.h +++ b/include/quda.h @@ -777,7 +777,7 @@ extern "C" { QudaBLASDataType data_type; /**< Specifies if using S(C) or D(Z) BLAS type */ QudaBLASDataOrder data_order; /**< Specifies if using Row or Column major */ } QudaBLASParam; - + typedef struct QudaGaugeFixParam_s { size_t struct_size; /**< Size of this struct in bytes. Used to ensure that the host application and QUDA see the same struct size */ diff --git a/lib/check_params.h b/lib/check_params.h index 0dbb87d96d..0f2982f8eb 100644 --- a/lib/check_params.h +++ b/lib/check_params.h @@ -268,7 +268,7 @@ void printQudaCloverParam(QudaInvertParam *param) #if defined CHECK_PARAM if (param->struct_size != (size_t)INVALID_INT && param->struct_size != sizeof(*param)) - errorQuda("Unexpected QudaInvertParam struct size %lu, expected %lu", param->struct_size, sizeof(*param)); + errorQuda("Unexpected QudaCloverParam struct size %lu, expected %lu", param->struct_size, sizeof(*param)); #else P(struct_size, (size_t)INVALID_INT); #endif diff --git a/lib/gauge_fix_fft.cu b/lib/gauge_fix_fft.cu index 74e0849d24..b9aec49975 100644 --- a/lib/gauge_fix_fft.cu +++ b/lib/gauge_fix_fft.cu @@ -182,11 +182,17 @@ namespace quda { }; template - void gaugeFixingFFT(GaugeField& data, int steps, int verbose_interval, - double alpha0, QudaBoolean autotune, double tolerance, QudaBoolean theta_condition) + void gaugeFixingFFT(GaugeField& data, QudaGaugeFixParam &fix_param) { TimeProfile profileInternalGaugeFixFFT("InternalGaugeFixQudaFFT", false); - + + QudaBoolean autotune = fix_param.fft_autotune; + double alpha0 = fix_param.fft_alpha; + double tolerance = fix_param.tolerance; + QudaBoolean theta_condition = fix_param.theta_condition; + int steps = fix_param.maxiter; + int verbose_interval = fix_param.verbosity_interval; + profileInternalGaugeFixFFT.TPSTART(QUDA_PROFILE_COMPUTE); if (getVerbosity() >= QUDA_SUMMARIZE) { @@ -364,15 +370,16 @@ namespace quda { } template struct GaugeFixingFFT { - GaugeFixingFFT(GaugeField& data, int gauge_dir, int steps, int verbose_interval, - double alpha, QudaBoolean autotune, double tolerance, QudaBoolean theta_condition) + GaugeFixingFFT(GaugeField& data, QudaGaugeFixParam &fix_param) { - if (gauge_dir != 3) { - if (getVerbosity() > QUDA_SUMMARIZE) printfQuda("Starting Landau gauge fixing with FFTs...\n"); - gaugeFixingFFT(data, steps, verbose_interval, alpha, autotune, tolerance, theta_condition); + if (fix_param.gauge_dir == 4) { + if (getVerbosity() > QUDA_SUMMARIZE) printfQuda("Starting Landau gauge fixing with FFTs\n"); + gaugeFixingFFT(data, fix_param); + } else if (fix_param.gauge_dir == 3) { + if (getVerbosity() > QUDA_SUMMARIZE) printfQuda("Starting Coulomb gauge fixing with FFTs\n"); + gaugeFixingFFT(data, fix_param); } else { - if (getVerbosity() > QUDA_SUMMARIZE) printfQuda("Starting Coulomb gauge fixing with FFTs...\n"); - gaugeFixingFFT(data, steps, verbose_interval, alpha, autotune, tolerance, theta_condition); + errorQuda("Unexpected gauge_dir = %d", fix_param.gauge_dir); } } }; @@ -380,23 +387,16 @@ namespace quda { /** * @brief Gauge fixing with Steepest descent method with FFTs with support for single GPU only. * @param[in,out] data, quda gauge field - * @param[in] gauge_dir, 3 for Coulomb gauge fixing, other for Landau gauge fixing - * @param[in] steps, maximum number of steps to perform gauge fixing - * @param[in] verbose_interval, print gauge fixing info when iteration count is a multiple of this - * @param[in] alpha, gauge fixing parameter of the method, most common value is 0.08 - * @param[in] autotune QUDA_BOOLEAN_TRUE to autotune the method, i.e., if the fix quality inverts its tendency we decrease the alpha value. - * @param[in] tolerance, torelance value to stop the method, if this value is zero then the method stops when iteration reachs the maximum number of steps defined by steps - * @param[in] theta_condition, QUDA_BOOLEAN_FALSE for MILC criterion and QUDA_BOOLEAN_TRUE to use the theta value + * @param[in] fix_param Parameter struct defining the gauge fixing */ #if defined(GPU_GAUGE_ALG) - void gaugeFixingFFT(GaugeField& data, const int gauge_dir, const int steps, const int verbose_interval, const double alpha, - const QudaBoolean autotune, const double tolerance, const QudaBoolean theta_condition) + void gaugeFixingFFT(GaugeField& data, QudaGaugeFixParam &fix_param) { if (comm_partitioned()) errorQuda("Gauge Fixing with FFTs in multi-GPU support NOT implemented yet!"); - instantiate(data, gauge_dir, steps, verbose_interval, alpha, autotune, tolerance, theta_condition); + instantiate(data, fix_param); } #else - void gaugeFixingFFT(GaugeField&, const int, const int, const int, const double, const QudaBoolean, const double, const QudaBoolean) + void gaugeFixingFFT(GaugeField&, QudaGaugeFixParam &) { errorQuda("Gauge fixing has bot been built"); } diff --git a/lib/gauge_fix_ovr.cu b/lib/gauge_fix_ovr.cu index e56e5e05c5..aff0df2fef 100644 --- a/lib/gauge_fix_ovr.cu +++ b/lib/gauge_fix_ovr.cu @@ -223,12 +223,17 @@ namespace quda { }; template - void gaugeFixingOVR(GaugeField &data, const int steps, const int verbose_interval, - const double relax_boost, const double tolerance, - const int reunit_interval, const QudaBoolean theta_condition) + void gaugeFixingOVR(GaugeField &data, QudaGaugeFixParam &fix_param) { TimeProfile profileInternalGaugeFixOVR("InternalGaugeFixQudaOVR", false); + double relax_boost = fix_param.ovr_relaxation_boost; + double tolerance = fix_param.tolerance; + QudaBoolean theta_condition = fix_param.theta_condition; + int steps = fix_param.maxiter; + int reunit_interval = fix_param.reunit_interval; + int verbose_interval = fix_param.verbosity_interval; + profileInternalGaugeFixOVR.TPSTART(QUDA_PROFILE_COMPUTE); double flop = 0; double byte = 0; @@ -470,17 +475,16 @@ namespace quda { } template struct GaugeFixingOVR { - GaugeFixingOVR(GaugeField& data, const int gauge_dir, const int steps, const int verbose_interval, - const double relax_boost, const double tolerance, const int reunit_interval, const QudaBoolean theta_condition) + GaugeFixingOVR(GaugeField& data, QudaGaugeFixParam &fix_param) { - if (gauge_dir == 4) { - if (getVerbosity() >= QUDA_SUMMARIZE) printfQuda("Starting Landau gauge fixing...\n"); - gaugeFixingOVR(data, steps, verbose_interval, relax_boost, tolerance, reunit_interval, theta_condition); - } else if (gauge_dir == 3) { - if (getVerbosity() >= QUDA_SUMMARIZE) printfQuda("Starting Coulomb gauge fixing...\n"); - gaugeFixingOVR(data, steps, verbose_interval, relax_boost, tolerance, reunit_interval, theta_condition); + if (fix_param.gauge_dir == 4) { + if (getVerbosity() >= QUDA_SUMMARIZE) printfQuda("Starting Landau gauge fixing with Overrelaxation\n"); + gaugeFixingOVR(data, fix_param); + } else if (fix_param.gauge_dir == 3) { + if (getVerbosity() >= QUDA_SUMMARIZE) printfQuda("Starting Coulomb gauge fixing with Overrelaxation\n"); + gaugeFixingOVR(data, fix_param); } else { - errorQuda("Unexpected gauge_dir = %d", gauge_dir); + errorQuda("Unexpected gauge_dir = %d", fix_param.gauge_dir); } } }; @@ -488,22 +492,15 @@ namespace quda { /** * @brief Gauge fixing with overrelaxation with support for single and multi GPU. * @param[in,out] data, quda gauge field - * @param[in] gauge_dir, 3 for Coulomb gauge fixing, other for Landau gauge fixing - * @param[in] steps, maximum number of steps to perform gauge fixing - * @param[in] verbose_interval, print gauge fixing info when iteration count is a multiple of this - * @param[in] relax_boost, gauge fixing parameter of the overrelaxation method, most common value is 1.5 or 1.7. - * @param[in] tolerance, torelance value to stop the method, if this value is zero then the method stops when iteration reachs the maximum number of steps defined by steps - * @param[in] reunit_interval, reunitarize gauge field when iteration count is a multiple of this - * @param[in] theta_condition, QUDA_BOOLEAN_FALSE for MILC criterion and QUDA_BOOLEAN_TRUE to use the theta value + * @param[in] fix_param Parameter struct defining the gauge fixing */ #ifdef GPU_GAUGE_ALG - void gaugeFixingOVR(GaugeField& data, const int gauge_dir, const int steps, const int verbose_interval, const double relax_boost, - const double tolerance, const int reunit_interval, const QudaBoolean theta_condition) + void gaugeFixingOVR(GaugeField& data, QudaGaugeFixParam &fix_param) { - instantiate(data, gauge_dir, steps, verbose_interval, relax_boost, tolerance, reunit_interval, theta_condition); + instantiate(data, fix_param); } #else - void gaugeFixingOVR(GaugeField&, const int, const int, const int, const double, const double, const int, const QudaBoolean) + void gaugeFixingOVR(GaugeField&, QudaGaugeFixParam &) { errorQuda("Gauge fixing has not been built"); } diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp index dce0f6ef06..355a29cf89 100644 --- a/lib/interface_quda.cpp +++ b/lib/interface_quda.cpp @@ -5531,39 +5531,25 @@ int computeGaugeFixingQuda(void *gauge, QudaGaugeParam *g_param, QudaGaugeFixPar checkGaugeParam(g_param); checkGaugeFixParam(fix_param); - cudaGaugeField *device_gauge = nullptr; - cpuGaugeField *cpu_gauge = nullptr; + profileGaugeFix.TPSTART(QUDA_PROFILE_INIT); + GaugeFieldParam gauge_param(*g_param, gauge); + //printfQuda("CPU start\n"); + //auto *cpu_gauge = new cpuGaugeField(gauge_param); + //printfQuda("CPU done\n"); + gauge_param.create = QUDA_REFERENCE_FIELD_CREATE; + gauge_param.link_type = g_param->type; + gauge_param.reconstruct = g_param->reconstruct; + gauge_param.setPrecision(gauge_param.Precision(), true); + auto *device_gauge = new cudaGaugeField(gauge_param); + printfQuda("GPU done\n"); + profileGaugeFix.TPSTOP(QUDA_PROFILE_INIT); - // Create host and device fields - if(g_param->location == QUDA_CPU_FIELD_LOCATION) { - // The gauge field is on the CPU. We must - // create a GPU gauge and transfer. - profileGaugeFix.TPSTART(QUDA_PROFILE_INIT); - GaugeFieldParam gauge_param(*g_param, gauge); - cpu_gauge = new cpuGaugeField(gauge_param); - gauge_param.create = QUDA_NULL_FIELD_CREATE; - gauge_param.link_type = g_param->type; - gauge_param.reconstruct = g_param->reconstruct; - gauge_param.setPrecision(gauge_param.Precision(), true); - device_gauge = new cudaGaugeField(gauge_param); - profileGaugeFix.TPSTOP(QUDA_PROFILE_INIT); - - // Load gauge to device - profileGaugeFix.TPSTART(QUDA_PROFILE_H2D); - device_gauge->loadCPUField(*cpu_gauge); - profileGaugeFix.TPSTOP(QUDA_PROFILE_H2D); - } else { - // The gauge field is on the GPU already, so - // we can just reference that field. - profileGaugeFix.TPSTART(QUDA_PROFILE_INIT); - GaugeFieldParam gauge_param(*g_param, gauge); - gauge_param.create = QUDA_REFERENCE_FIELD_CREATE; - gauge_param.link_type = g_param->type; - gauge_param.reconstruct = g_param->reconstruct; - gauge_param.setPrecision(gauge_param.Precision(), true); - device_gauge = new cudaGaugeField(gauge_param); - profileGaugeFix.TPSTOP(QUDA_PROFILE_INIT); - } + // Load gauge to device + profileGaugeFix.TPSTART(QUDA_PROFILE_H2D); + //device_gauge->loadCPUField(*cpu_gauge); + profileGaugeFix.TPSTOP(QUDA_PROFILE_H2D); + + printfQuda("Perform update\n"); // Perform the update switch (fix_param->fix_type) { @@ -5571,17 +5557,13 @@ int computeGaugeFixingQuda(void *gauge, QudaGaugeParam *g_param, QudaGaugeFixPar case QUDA_GAUGEFIX_TYPE_OVR: if (comm_size() == 1) { profileGaugeFix.TPSTART(QUDA_PROFILE_COMPUTE); - gaugeFixingOVR(*device_gauge, fix_param->gauge_dir, fix_param->maxiter, fix_param->verbosity_interval, - fix_param->ovr_relaxation_boost, fix_param->tolerance, fix_param->reunit_interval, - fix_param->theta_condition); + gaugeFixingOVR(*device_gauge, *fix_param); profileGaugeFix.TPSTOP(QUDA_PROFILE_COMPUTE); } else { // For MPI, we must perform a halo exchange cudaGaugeField *device_gauge_extended = createExtendedGauge(*device_gauge, R, profileGaugeFix); profileGaugeFix.TPSTART(QUDA_PROFILE_COMPUTE); - gaugeFixingOVR(*device_gauge_extended, fix_param->gauge_dir, fix_param->maxiter, fix_param->verbosity_interval, - fix_param->ovr_relaxation_boost, fix_param->tolerance, fix_param->reunit_interval, - fix_param->theta_condition); + gaugeFixingOVR(*device_gauge_extended, *fix_param); profileGaugeFix.TPSTOP(QUDA_PROFILE_COMPUTE); copyExtendedGauge(*device_gauge, *device_gauge_extended, QUDA_CUDA_FIELD_LOCATION); } @@ -5589,8 +5571,7 @@ int computeGaugeFixingQuda(void *gauge, QudaGaugeParam *g_param, QudaGaugeFixPar case QUDA_GAUGEFIX_TYPE_FFT: profileGaugeFix.TPSTART(QUDA_PROFILE_COMPUTE); - gaugeFixingFFT(*device_gauge, fix_param->gauge_dir, fix_param->maxiter, fix_param->verbosity_interval, - fix_param->fft_alpha, fix_param->fft_autotune, fix_param->tolerance, fix_param->theta_condition); + gaugeFixingFFT(*device_gauge, *fix_param); profileGaugeFix.TPSTOP(QUDA_PROFILE_COMPUTE); break; @@ -5601,7 +5582,7 @@ int computeGaugeFixingQuda(void *gauge, QudaGaugeParam *g_param, QudaGaugeFixPar // from the host if(g_param->location == QUDA_CPU_FIELD_LOCATION) { profileGaugeFix.TPSTART(QUDA_PROFILE_D2H); - device_gauge->saveCPUField(*cpu_gauge); + //device_gauge->saveCPUField(*cpu_gauge); profileGaugeFix.TPSTOP(QUDA_PROFILE_D2H); } diff --git a/tests/gauge_alg_test.cpp b/tests/gauge_alg_test.cpp index a0a068d8a9..2f467f7b0d 100644 --- a/tests/gauge_alg_test.cpp +++ b/tests/gauge_alg_test.cpp @@ -11,7 +11,6 @@ #include #include #include - #include #include @@ -39,17 +38,28 @@ bool gauge_load; bool gauge_store; void *host_gauge[4]; +// Define the command line options and option group for this test +int gf_gauge_dir = 4; +int gf_maxiter = 10000; +int gf_verbosity_interval = 100; +double gf_ovr_relaxation_boost = 1.5; +double gf_fft_alpha = 0.8; +bool gf_fft_autotune = true; +int gf_reunit_interval = 10; +double gf_tolerance = 1e-6; +bool gf_theta_condition = false; +QudaGaugeFixType gf_fix_type = QUDA_GAUGEFIX_TYPE_OVR; + void display_test_info() { printfQuda("running the following test:\n"); switch (test_type) { case 0: printfQuda("\n Google testing\n"); break; - case 1: printfQuda("\nOVR gauge fix\n"); break; - case 2: printfQuda("\nFFT gauge fix\n"); break; + case 1: printfQuda("\n%s %s gauge fix\n", get_gaugefix_str(gf_fix_type), gf_gauge_dir == 4 ? "Landau" : "Coulomb"); break; default: errorQuda("Undefined test type %d given", test_type); } - + printfQuda("prec sloppy_prec link_recon sloppy_link_recon S_dimension T_dimension Ls_dimension\n"); printfQuda("%s %s %s %s %d/%d/%d %d %d\n", get_prec_str(prec), get_prec_str(prec_sloppy), get_recon_str(link_recon), get_recon_str(link_recon_sloppy), xdim, ydim, zdim, @@ -60,18 +70,6 @@ void display_test_info() dimPartitioned(3)); } -// Define the command line options and option group for this test -int gf_gauge_dir = 4; -int gf_maxiter = 10000; -int gf_verbosity_interval = 100; -double gf_ovr_relaxation_boost = 1.5; -double gf_fft_alpha = 0.8; -bool gf_fft_autotune = true; -int gf_reunit_interval = 10; -double gf_tolerance = 1e-6; -bool gf_theta_condition = false; -QudaGaugeFixType gf_fix_type = QUDA_GAUGEFIX_TYPE_OVR; - void add_gaugefix_option_group(std::shared_ptr quda_app) { CLI::TransformPairs fix_type_map {{"ovr", QUDA_GAUGEFIX_TYPE_OVR}, {"fft", QUDA_GAUGEFIX_TYPE_FFT}}; @@ -370,8 +368,14 @@ TEST_F(GaugeAlgTest, Landau_Overrelaxation) { if (execute) { printfQuda("Landau gauge fixing with overrelaxation\n"); - //gaugeFixingOVR(*U, 4, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, - //gf_theta_condition); + // Set gauge fixing params from the command line + // and adjust for this test type + fix_param = newQudaGaugeFixParam(); + setGaugeFixParam(fix_param); + fix_param.fix_type = QUDA_GAUGEFIX_TYPE_OVR; + fix_param.gauge_dir = 4; + + gaugeFixingOVR(*U, fix_param); auto plaq_gf = plaquette(*U); printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z); printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); @@ -384,8 +388,14 @@ TEST_F(GaugeAlgTest, Coulomb_Overrelaxation) { if (execute) { printfQuda("Coulomb gauge fixing with overrelaxation\n"); - //gaugeFixingOVR(*U, 3, gf_maxiter, gf_verbosity_interval, gf_ovr_relaxation_boost, gf_tolerance, gf_reunit_interval, - //gf_theta_condition); + // Use gauge fixing params from the command line + // and adjust for this test type + fix_param = newQudaGaugeFixParam(); + setGaugeFixParam(fix_param); + fix_param.fix_type = QUDA_GAUGEFIX_TYPE_OVR; + fix_param.gauge_dir = 3; + + gaugeFixingOVR(*U, fix_param); auto plaq_gf = plaquette(*U); printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z); printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); @@ -399,8 +409,14 @@ TEST_F(GaugeAlgTest, Landau_FFT) if (execute) { if (!comm_partitioned()) { printfQuda("Landau gauge fixing with steepest descent method with FFT\n"); - //gaugeFixingFFT(*U, 4, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, - //gf_theta_condition); + // Set gauge fixing params from the command line + // and adjust for this test type + fix_param = newQudaGaugeFixParam(); + setGaugeFixParam(fix_param); + fix_param.fix_type = QUDA_GAUGEFIX_TYPE_FFT; + fix_param.gauge_dir = 4; + + gaugeFixingFFT(*U, fix_param); auto plaq_gf = plaquette(*U); printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z); printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); @@ -415,8 +431,14 @@ TEST_F(GaugeAlgTest, Coulomb_FFT) if (execute) { if (!comm_partitioned()) { printfQuda("Coulomb gauge fixing with steepest descent method with FFT\n"); - //gaugeFixingFFT(*U, 3, gf_maxiter, gf_verbosity_interval, gf_fft_alpha, gf_fft_autotune, gf_tolerance, - //gf_theta_condition); + // Set gauge fixing params from the command line + // and adjust for this test type + fix_param = newQudaGaugeFixParam(); + setGaugeFixParam(fix_param); + fix_param.fix_type = QUDA_GAUGEFIX_TYPE_FFT; + fix_param.gauge_dir = 3; + + gaugeFixingFFT(*U, fix_param); auto plaq_gf = plaquette(*U); printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z); printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); @@ -439,7 +461,6 @@ int main(int argc, char **argv) test_type = 0; CLI::TransformPairs test_type_map {{"Google", 0}, {"OVR", 1}, {"FFT", 2}}; app->add_option("--test", test_type, "Test method")->transform(CLI::CheckedTransformer(test_type_map)); - try { app->parse(argc, argv); } catch (const CLI::ParseError &e) { @@ -448,23 +469,23 @@ int main(int argc, char **argv) // initialize QMP/MPI, QUDA comms grid and RNG (host_utils.cpp) initComms(argc, argv, gridsize_from_cmdline); - QudaGaugeParam gauge_param = newQudaGaugeParam(); setVerbosity(QUDA_VERBOSE); setQudaPrecisions(); setWilsonGaugeParam(gauge_param); setDims(gauge_param.X); - - //if (prec_sloppy == QUDA_INVALID_PRECISION) prec_sloppy = prec; - //if (link_recon_sloppy == QUDA_RECONSTRUCT_INVALID) link_recon_sloppy = link_recon; - + // call srand() with a rank-dependent seed + initRand(); + // initialize the QUDA library + initQuda(device_ordinal); + display_test_info(); + // If we are passing a gauge field to the test, we must allocate host memory. + // If no gauge is passed, we generate a quenched field on the device. gauge_load = strcmp(latfile, ""); gauge_store = strcmp(gauge_outfile, ""); - // If we are passing a gauge field to the test, we must allocate host memory. - // If no gauge is passed, we generate a quenched field on the device. if (gauge_load) { printfQuda("Loading gauge field from host\n"); for (int dir = 0; dir < 4; dir++) { @@ -473,12 +494,6 @@ int main(int argc, char **argv) constructHostGaugeField(host_gauge, gauge_param, argc, argv); } - // call srand() with a rank-dependent seed - initRand(); - - // initialize the QUDA library - initQuda(device_ordinal); - // initalize google test, includes command line options ::testing::InitGoogleTest(&argc, argv); From 84b675d67073c57cf58165596c2aa810f374d146 Mon Sep 17 00:00:00 2001 From: cpviolator Date: Tue, 21 Dec 2021 18:20:57 -0800 Subject: [PATCH 27/32] Restrict interface to CPU gauge fields, enforces GPU kernel call for GPU fields --- lib/interface_quda.cpp | 41 ++++++------ tests/gauge_alg_test.cpp | 140 +++++++++++++++++++++++---------------- 2 files changed, 105 insertions(+), 76 deletions(-) diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp index 355a29cf89..d92b1da7b6 100644 --- a/lib/interface_quda.cpp +++ b/lib/interface_quda.cpp @@ -5524,32 +5524,35 @@ int computeGaugeFixingQuda(void *gauge, QudaGaugeParam *g_param, QudaGaugeFixPar profileGaugeFix.TPSTART(QUDA_PROFILE_TOTAL); if (!initialized) errorQuda("QUDA not initialized"); - printQudaGaugeParam(g_param); - printQudaGaugeFixParam(fix_param); + if (getVerbosity() == QUDA_DEBUG_VERBOSE) { + printQudaGaugeParam(g_param); + printQudaGaugeFixParam(fix_param); + } // Check parameters checkGaugeParam(g_param); checkGaugeFixParam(fix_param); + if(g_param->location == QUDA_CUDA_FIELD_LOCATION) { + errorQuda("GPU gauge fixing not supported via QUDA interface. Please use direct kernel call: gaugeFixingOVR/gaugeFixingFFT"); + } + profileGaugeFix.TPSTART(QUDA_PROFILE_INIT); GaugeFieldParam gauge_param(*g_param, gauge); - //printfQuda("CPU start\n"); - //auto *cpu_gauge = new cpuGaugeField(gauge_param); - //printfQuda("CPU done\n"); - gauge_param.create = QUDA_REFERENCE_FIELD_CREATE; + auto *cpu_gauge = new cpuGaugeField(gauge_param); + + // Make GPU field + gauge_param.create = QUDA_NULL_FIELD_CREATE; gauge_param.link_type = g_param->type; gauge_param.reconstruct = g_param->reconstruct; gauge_param.setPrecision(gauge_param.Precision(), true); - auto *device_gauge = new cudaGaugeField(gauge_param); - printfQuda("GPU done\n"); + auto *device_gauge = new cudaGaugeField(gauge_param); profileGaugeFix.TPSTOP(QUDA_PROFILE_INIT); - + // Load gauge to device profileGaugeFix.TPSTART(QUDA_PROFILE_H2D); - //device_gauge->loadCPUField(*cpu_gauge); + device_gauge->loadCPUField(*cpu_gauge); profileGaugeFix.TPSTOP(QUDA_PROFILE_H2D); - - printfQuda("Perform update\n"); // Perform the update switch (fix_param->fix_type) { @@ -5578,14 +5581,11 @@ int computeGaugeFixingQuda(void *gauge, QudaGaugeParam *g_param, QudaGaugeFixPar default: errorQuda("Unkown gauge fix type %d", fix_param->fix_type); } - // Copy the fixed gauge field back to the host if it came - // from the host - if(g_param->location == QUDA_CPU_FIELD_LOCATION) { - profileGaugeFix.TPSTART(QUDA_PROFILE_D2H); - //device_gauge->saveCPUField(*cpu_gauge); - profileGaugeFix.TPSTOP(QUDA_PROFILE_D2H); - } - + // Copy the fixed gauge field back to the host. + profileGaugeFix.TPSTART(QUDA_PROFILE_D2H); + device_gauge->saveCPUField(*cpu_gauge); + profileGaugeFix.TPSTOP(QUDA_PROFILE_D2H); + profileGaugeFix.TPSTOP(QUDA_PROFILE_TOTAL); if (g_param->make_resident_gauge) { @@ -5594,6 +5594,7 @@ int computeGaugeFixingQuda(void *gauge, QudaGaugeParam *g_param, QudaGaugeFixPar } else { delete device_gauge; } + delete cpu_gauge; if(timeinfo){ timeinfo[0] = profileGaugeFix.Last(QUDA_PROFILE_H2D); diff --git a/tests/gauge_alg_test.cpp b/tests/gauge_alg_test.cpp index 2f467f7b0d..7648e64321 100644 --- a/tests/gauge_alg_test.cpp +++ b/tests/gauge_alg_test.cpp @@ -118,8 +118,9 @@ class GaugeAlgTest : public ::testing::Test QudaGaugeFixParam fix_param; host_timer_t host_timer_1, host_timer_2; - double2 detu; - double3 plaq; + double2 det_u; + double2 trace_u; + double3 plaq_u; cudaGaugeField *U; int nsteps; int nhbsteps; @@ -215,8 +216,8 @@ class GaugeAlgTest : public ::testing::Test unitarizeLinks(*U, num_failures_d); qudaDeviceSynchronize(); if (*num_failures_h > 0) errorQuda("Error in the unitarization (%d errors)", *num_failures_h); - plaq = plaquette(*U); - printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z); + plaq_u = plaquette(*U); + printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_u.x, plaq_u.y, plaq_u.z); } host_timer_2.stop(); @@ -256,19 +257,23 @@ class GaugeAlgTest : public ::testing::Test unitarizeLinks(*U, num_failures_d); qudaDeviceSynchronize(); if (*num_failures_h > 0) errorQuda("Error in the unitarization (%d errors)", *num_failures_h); - - plaq = plaquette(*U); - printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z); } - // If a specific test type is requested, perfrom it now and then + // Unfixed Gauge data + plaq_u = plaquette(*U); + det_u = getLinkDeterminant(*U); + trace_u = getLinkTrace(*U); + printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_u.x, plaq_u.y, plaq_u.z); + printfQuda("Det: %.16e:%.16e\n", det_u.x, det_u.y); + printfQuda("Tr: %.16e:%.16e\n", trace_u.x / 3.0, trace_u.y / 3.0); + + // If a specific test type is requested, perform it now and then // turn off all Google tests in the tear down. switch (test_type) { - case 0: - // Do the Google testing - break; - case 1: run(); break; - //case 2: run_fft(); break; + case 0: // Do the Google testing + break; + case 1: // Do a specific test + run(); break; default: errorQuda("Invalid test type %d ", test_type); } @@ -279,21 +284,37 @@ class GaugeAlgTest : public ::testing::Test virtual void TearDown() { if (execute) { - detu = getLinkDeterminant(*U); - double2 tru = getLinkTrace(*U); - printfQuda("Det: %.16e:%.16e\n", detu.x, detu.y); - printfQuda("Tr: %.16e:%.16e\n", tru.x / 3.0, tru.y / 3.0); + + // Compare gauge fixed data with original data + auto plaq_gf = plaquette(*U); + auto det_gf = getLinkDeterminant(*U); + auto trace_gf = getLinkTrace(*U); + printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_u.x, plaq_u.y, plaq_u.z); + printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); + printfQuda("Det: %.16e, %.16e\n", det_u.x, det_u.y); + printfQuda("Det GF: %.16e, %.16e\n", det_gf.x, det_gf.y); + printfQuda("Trace: %.16e, %.16e\n", trace_u.x / 3.0, trace_u.y / 3.0); + printfQuda("Trace GF: %.16e, %.16e\n", trace_gf.x / 3.0, trace_gf.y / 3.0); + + // As an observable, the plaquette value must remain invariant after + // gauge fixing. + ASSERT_TRUE(comparePlaquette(plaq_u, plaq_gf)); + + // The determinant of any SU(N) gauge field element must be (1.0,0.0) to + // machine precision + ASSERT_TRUE(CheckDeterminant(det_gf)); delete U; // Release all temporary memory used for data exchange between GPUs in multi-GPU mode PGaugeExchangeFree(); - + host_timer_1.stop(); printfQuda("Time -> %.6f s\n", host_timer_1.last()); } // If we performed a specific instance, switch off the // Google testing. if (test_type != 0) execute = false; + saveTuneCache(); } virtual void run() @@ -303,22 +324,48 @@ class GaugeAlgTest : public ::testing::Test fix_param = newQudaGaugeFixParam(); setGaugeFixParam(fix_param); - // Setup gauge container. + printfQuda("%s gauge fixing with %s method\n", fix_param.gauge_dir == 4 ? "Landau" : "Coulomb", get_gaugefix_str(fix_param.fix_type)); + + // Setup CPU gauge container. gauge_param = newQudaGaugeParam(); setWilsonGaugeParam(gauge_param); gauge_param.t_boundary = QUDA_PERIODIC_T; - gauge_param.location = QUDA_CUDA_FIELD_LOCATION; + gauge_param.location = QUDA_CPU_FIELD_LOCATION; - //GaugeFieldParam param(*U); - printfQuda("%s gauge fixing with %s method\n", fix_param.gauge_dir == 4 ? "Landau" : "Coulomb", get_gaugefix_str(fix_param.fix_type)); + void *cpu_gauge[4]; + for (int dir = 0; dir < 4; dir++) { cpu_gauge[dir] = safe_malloc(V * gauge_site_size * gauge_param.cpu_prec); } - computeGaugeFixingQuda(U->Gauge_p(), &gauge_param, &fix_param, nullptr); + GaugeFieldParam param(gauge_param); + param.ghostExchange = QUDA_GHOST_EXCHANGE_NO; + param.create = QUDA_NULL_FIELD_CREATE; + param.link_type = gauge_param.type; + param.reconstruct = gauge_param.reconstruct; + param.setPrecision(param.Precision(), true); - auto plaq_gf = plaquette(*U); - printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z); - printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); - ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); - saveTuneCache(); + auto *gauge = new cudaGaugeField(param); + + // Copy the target U field (extended) into regular GPU field, then + // save to a CPU field. This is done to test the CPU interface function + // and instructs the user how to use void pointers for the gauge data, + // and the gauge_param container. + copyExtendedGauge(*gauge, *U, QUDA_CUDA_FIELD_LOCATION); + saveGaugeFieldQuda((void *)cpu_gauge, (void *)gauge, &gauge_param); + delete gauge; + + // Compute gauge fixing via interface + computeGaugeFixingQuda(cpu_gauge, &gauge_param, &fix_param, nullptr); + + // cpu_gauge now contains the fixed gauge on the CPU. We now load that gauge + // to the device for inspection in the TearDown. + GaugeFieldParam fixed_param(gauge_param, cpu_gauge); + auto *fixed_cpu_gauge = new cpuGaugeField(fixed_param); + + // Copy the CPU field to U. + U->loadCPUField(*fixed_cpu_gauge); + + for (int dir = 0; dir < 4; dir++) host_free(cpu_gauge[dir]); + delete fixed_cpu_gauge; + // Save if output string is specified if (gauge_store) save_gauge(); } @@ -328,9 +375,6 @@ class GaugeAlgTest : public ::testing::Test { printfQuda("Saving the gauge field to file %s\n", gauge_outfile); - //QudaGaugeParam gauge_param = newQudaGaugeParam(); - //setWilsonGaugeParam(gauge_param); - void *cpu_gauge[4]; for (int dir = 0; dir < 4; dir++) { cpu_gauge[dir] = safe_malloc(V * gauge_site_size * gauge_param.cpu_prec); } @@ -359,8 +403,11 @@ class GaugeAlgTest : public ::testing::Test TEST_F(GaugeAlgTest, Generation) { if (execute && !gauge_load) { - detu = getLinkDeterminant(*U); - ASSERT_TRUE(CheckDeterminant(detu)); + det_u = getLinkDeterminant(*U); + //trace_u = getLinkTrace(*U); + //printfQuda("Det: %.16e:%.16e\n", det_u.x, det_u.y); + //printfQuda("Tr: %.16e:%.16e\n", trace_u.x / 3.0, trace_u.y / 3.0); + ASSERT_TRUE(CheckDeterminant(det_u)); } } @@ -375,12 +422,7 @@ TEST_F(GaugeAlgTest, Landau_Overrelaxation) fix_param.fix_type = QUDA_GAUGEFIX_TYPE_OVR; fix_param.gauge_dir = 4; - gaugeFixingOVR(*U, fix_param); - auto plaq_gf = plaquette(*U); - printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z); - printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); - ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); - saveTuneCache(); + gaugeFixingOVR(*U, fix_param); } } @@ -395,12 +437,7 @@ TEST_F(GaugeAlgTest, Coulomb_Overrelaxation) fix_param.fix_type = QUDA_GAUGEFIX_TYPE_OVR; fix_param.gauge_dir = 3; - gaugeFixingOVR(*U, fix_param); - auto plaq_gf = plaquette(*U); - printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z); - printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); - ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); - saveTuneCache(); + gaugeFixingOVR(*U, fix_param); } } @@ -417,11 +454,6 @@ TEST_F(GaugeAlgTest, Landau_FFT) fix_param.gauge_dir = 4; gaugeFixingFFT(*U, fix_param); - auto plaq_gf = plaquette(*U); - printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z); - printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); - ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); - saveTuneCache(); } } } @@ -439,11 +471,6 @@ TEST_F(GaugeAlgTest, Coulomb_FFT) fix_param.gauge_dir = 3; gaugeFixingFFT(*U, fix_param); - auto plaq_gf = plaquette(*U); - printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq.x, plaq.y, plaq.z); - printfQuda("Plaq GF: %.16e, %.16e, %.16e\n", plaq_gf.x, plaq_gf.y, plaq_gf.z); - ASSERT_TRUE(comparePlaquette(plaq, plaq_gf)); - saveTuneCache(); } } } @@ -470,10 +497,11 @@ int main(int argc, char **argv) // initialize QMP/MPI, QUDA comms grid and RNG (host_utils.cpp) initComms(argc, argv, gridsize_from_cmdline); QudaGaugeParam gauge_param = newQudaGaugeParam(); - setVerbosity(QUDA_VERBOSE); + setVerbosity(verbosity); setQudaPrecisions(); setWilsonGaugeParam(gauge_param); setDims(gauge_param.X); + // call srand() with a rank-dependent seed initRand(); // initialize the QUDA library From 0e020b970cf000910e1f251d80ae053c04d6ee7f Mon Sep 17 00:00:00 2001 From: cpviolator Date: Tue, 21 Dec 2021 18:55:37 -0800 Subject: [PATCH 28/32] Set gauge fixing parameters once only, use a CL supplied seed (updated other tests to use a CL supplied seed) --- tests/gauge_alg_test.cpp | 31 ++++++++++------------------- tests/heatbath_test.cpp | 2 +- tests/invert_test.cpp | 2 +- tests/multigrid_evolve_test.cpp | 2 +- tests/staggered_invert_test.cpp | 2 +- tests/utils/command_line_params.cpp | 6 ++++++ tests/utils/command_line_params.h | 1 + tests/utils/host_utils.cpp | 2 +- 8 files changed, 23 insertions(+), 25 deletions(-) diff --git a/tests/gauge_alg_test.cpp b/tests/gauge_alg_test.cpp index 7648e64321..91e9068e58 100644 --- a/tests/gauge_alg_test.cpp +++ b/tests/gauge_alg_test.cpp @@ -195,7 +195,7 @@ class GaugeAlgTest : public ::testing::Test U = new cudaGaugeField(device_gauge_param); - RNG randstates(*U, 1234); + RNG randstates(*U, quda_seed); nsteps = heatbath_num_steps; nhbsteps = heatbath_num_heatbath_per_step; @@ -271,6 +271,10 @@ class GaugeAlgTest : public ::testing::Test // turn off all Google tests in the tear down. switch (test_type) { case 0: // Do the Google testing + // Set gauge fixing params from the command line + // and adjust for this test type + fix_param = newQudaGaugeFixParam(); + setGaugeFixParam(fix_param); break; case 1: // Do a specific test run(); break; @@ -403,10 +407,9 @@ class GaugeAlgTest : public ::testing::Test TEST_F(GaugeAlgTest, Generation) { if (execute && !gauge_load) { + // Assert that the generated gauge is + // on the SU(N) manifold det_u = getLinkDeterminant(*U); - //trace_u = getLinkTrace(*U); - //printfQuda("Det: %.16e:%.16e\n", det_u.x, det_u.y); - //printfQuda("Tr: %.16e:%.16e\n", trace_u.x / 3.0, trace_u.y / 3.0); ASSERT_TRUE(CheckDeterminant(det_u)); } } @@ -415,10 +418,7 @@ TEST_F(GaugeAlgTest, Landau_Overrelaxation) { if (execute) { printfQuda("Landau gauge fixing with overrelaxation\n"); - // Set gauge fixing params from the command line - // and adjust for this test type - fix_param = newQudaGaugeFixParam(); - setGaugeFixParam(fix_param); + fix_param.fix_type = QUDA_GAUGEFIX_TYPE_OVR; fix_param.gauge_dir = 4; @@ -430,10 +430,7 @@ TEST_F(GaugeAlgTest, Coulomb_Overrelaxation) { if (execute) { printfQuda("Coulomb gauge fixing with overrelaxation\n"); - // Use gauge fixing params from the command line - // and adjust for this test type - fix_param = newQudaGaugeFixParam(); - setGaugeFixParam(fix_param); + fix_param.fix_type = QUDA_GAUGEFIX_TYPE_OVR; fix_param.gauge_dir = 3; @@ -446,10 +443,7 @@ TEST_F(GaugeAlgTest, Landau_FFT) if (execute) { if (!comm_partitioned()) { printfQuda("Landau gauge fixing with steepest descent method with FFT\n"); - // Set gauge fixing params from the command line - // and adjust for this test type - fix_param = newQudaGaugeFixParam(); - setGaugeFixParam(fix_param); + fix_param.fix_type = QUDA_GAUGEFIX_TYPE_FFT; fix_param.gauge_dir = 4; @@ -463,10 +457,7 @@ TEST_F(GaugeAlgTest, Coulomb_FFT) if (execute) { if (!comm_partitioned()) { printfQuda("Coulomb gauge fixing with steepest descent method with FFT\n"); - // Set gauge fixing params from the command line - // and adjust for this test type - fix_param = newQudaGaugeFixParam(); - setGaugeFixParam(fix_param); + fix_param.fix_type = QUDA_GAUGEFIX_TYPE_FFT; fix_param.gauge_dir = 3; diff --git a/tests/heatbath_test.cpp b/tests/heatbath_test.cpp index 033ff597df..300f0b1012 100644 --- a/tests/heatbath_test.cpp +++ b/tests/heatbath_test.cpp @@ -125,7 +125,7 @@ int main(int argc, char **argv) for(int dir=0; dir<4; ++dir) gParamEx.r[dir] = R[dir]; cudaGaugeField *gaugeEx = new cudaGaugeField(gParamEx); // CURAND random generator initialization - RNG *randstates = new RNG(*gauge, 1234); + RNG *randstates = new RNG(*gauge, quda_seed); int nsteps = heatbath_num_steps; int nwarm = heatbath_warmup_steps; diff --git a/tests/invert_test.cpp b/tests/invert_test.cpp index e559f04520..1ba40618c9 100644 --- a/tests/invert_test.cpp +++ b/tests/invert_test.cpp @@ -304,7 +304,7 @@ int main(int argc, char **argv) std::vector gflops(Nsrc); std::vector iter(Nsrc); - auto *rng = new quda::RNG(*check, 1234); + auto *rng = new quda::RNG(*check, quda_seed); for (int i = 0; i < Nsrc; i++) { // Populate the host spinor with random numbers. diff --git a/tests/multigrid_evolve_test.cpp b/tests/multigrid_evolve_test.cpp index de6e3f22d9..fb63443ae8 100644 --- a/tests/multigrid_evolve_test.cpp +++ b/tests/multigrid_evolve_test.cpp @@ -244,7 +244,7 @@ int main(int argc, char **argv) obs_param.compute_qcharge = QUDA_BOOLEAN_TRUE; // CURAND random generator initialization - RNG *randstates = new RNG(*gauge, 1234); + RNG *randstates = new RNG(*gauge, quda_seed); int nsteps = 10; int nhbsteps = 1; int novrsteps = 1; diff --git a/tests/staggered_invert_test.cpp b/tests/staggered_invert_test.cpp index b2f4e96588..a0a39c7adb 100644 --- a/tests/staggered_invert_test.cpp +++ b/tests/staggered_invert_test.cpp @@ -299,7 +299,7 @@ int main(int argc, char **argv) //----------------------------------------------------------------------------------- // Prepare rng - auto *rng = new quda::RNG(*ref, 1234); + auto *rng = new quda::RNG(*ref, quda_seed); // Performance measuring std::vector time(Nsrc); diff --git a/tests/utils/command_line_params.cpp b/tests/utils/command_line_params.cpp index 5dd4ab8665..1201590521 100644 --- a/tests/utils/command_line_params.cpp +++ b/tests/utils/command_line_params.cpp @@ -9,6 +9,8 @@ int device_ordinal = -1; int device_ordinal = 0; #endif +int quda_seed = 1234; + int rank_order; std::array gridsize_from_cmdline = {1, 1, 1, 1}; auto &grid_x = gridsize_from_cmdline[0]; @@ -501,6 +503,10 @@ std::shared_ptr make_app(std::string app_description, std::string app_n quda_app->add_option("--save-gauge", gauge_outfile, "Save gauge field \" file \" for the test (requires QIO, heatbath test only)"); + quda_app->add_option("--seed", quda_seed, + "Seed value for use in test suite (default 1234)") + ->check(CLI::PositiveNumber); + quda_app->add_option("--solution-pipeline", solution_accumulator_pipeline, "The pipeline length for fused solution accumulation (default 0, no pipelining)"); diff --git a/tests/utils/command_line_params.h b/tests/utils/command_line_params.h index 10c8d775f2..77ac51f4cf 100644 --- a/tests/utils/command_line_params.h +++ b/tests/utils/command_line_params.h @@ -151,6 +151,7 @@ template std::string inline get_string(CLI::TransformPairs &map, // } // parameters +extern int quda_seed; extern int device_ordinal; extern int rank_order; extern bool native_blas_lapack; diff --git a/tests/utils/host_utils.cpp b/tests/utils/host_utils.cpp index c961c85e0e..ff3d60ced5 100644 --- a/tests/utils/host_utils.cpp +++ b/tests/utils/host_utils.cpp @@ -320,7 +320,7 @@ void initRand() MPI_Comm_rank(MPI_COMM_WORLD, &rank); #endif - srand(17 * rank + 137); + srand(17 * rank + 137 + quda_seed); } void setDims(int *X) From 56725151650cad3ac0047bf76be8e84b0642d0ed Mon Sep 17 00:00:00 2001 From: cpviolator Date: Tue, 21 Dec 2021 19:02:13 -0800 Subject: [PATCH 29/32] Develop merge, clang tidy --- include/gauge_tools.h | 2 +- include/quda.h | 2 +- lib/check_params.h | 2 +- lib/interface_quda.cpp | 21 +++---- tests/gauge_alg_test.cpp | 85 ++++++++++++++++------------- tests/utils/command_line_params.cpp | 6 +- tests/utils/misc.cpp | 2 +- 7 files changed, 63 insertions(+), 57 deletions(-) diff --git a/include/gauge_tools.h b/include/gauge_tools.h index b53134ecfc..e2393cf239 100644 --- a/include/gauge_tools.h +++ b/include/gauge_tools.h @@ -116,7 +116,7 @@ namespace quda * @param[in] fix_param Parameter struct that defines the gauge fixing */ void gaugeFixingOVR(GaugeField &data, QudaGaugeFixParam &fix_param); - + /** * @brief Gauge fixing with Steepest descent method with FFTs with support for single GPU only. * @param[in,out] data, quda gauge field diff --git a/include/quda.h b/include/quda.h index 02b8601809..0b7d633ec9 100644 --- a/include/quda.h +++ b/include/quda.h @@ -777,7 +777,7 @@ extern "C" { QudaBLASDataType data_type; /**< Specifies if using S(C) or D(Z) BLAS type */ QudaBLASDataOrder data_order; /**< Specifies if using Row or Column major */ } QudaBLASParam; - + typedef struct QudaGaugeFixParam_s { size_t struct_size; /**< Size of this struct in bytes. Used to ensure that the host application and QUDA see the same struct size */ diff --git a/lib/check_params.h b/lib/check_params.h index 0f2982f8eb..b49cf04cb0 100644 --- a/lib/check_params.h +++ b/lib/check_params.h @@ -1079,7 +1079,7 @@ void printQudaGaugeFixParam(QudaGaugeFixParam *param) P(fft_autotune, QUDA_BOOLEAN_FALSE); P(theta_condition, QUDA_BOOLEAN_FALSE); #endif - + #ifdef INIT_PARAM return ret; #endif diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp index d92b1da7b6..28fe842090 100644 --- a/lib/interface_quda.cpp +++ b/lib/interface_quda.cpp @@ -5528,15 +5528,16 @@ int computeGaugeFixingQuda(void *gauge, QudaGaugeParam *g_param, QudaGaugeFixPar printQudaGaugeParam(g_param); printQudaGaugeFixParam(fix_param); } - + // Check parameters checkGaugeParam(g_param); checkGaugeFixParam(fix_param); - if(g_param->location == QUDA_CUDA_FIELD_LOCATION) { - errorQuda("GPU gauge fixing not supported via QUDA interface. Please use direct kernel call: gaugeFixingOVR/gaugeFixingFFT"); + if (g_param->location == QUDA_CUDA_FIELD_LOCATION) { + errorQuda("GPU gauge fixing not supported via QUDA interface. Please use direct kernel call: " + "gaugeFixingOVR/gaugeFixingFFT"); } - + profileGaugeFix.TPSTART(QUDA_PROFILE_INIT); GaugeFieldParam gauge_param(*g_param, gauge); auto *cpu_gauge = new cpuGaugeField(gauge_param); @@ -5546,14 +5547,14 @@ int computeGaugeFixingQuda(void *gauge, QudaGaugeParam *g_param, QudaGaugeFixPar gauge_param.link_type = g_param->type; gauge_param.reconstruct = g_param->reconstruct; gauge_param.setPrecision(gauge_param.Precision(), true); - auto *device_gauge = new cudaGaugeField(gauge_param); + auto *device_gauge = new cudaGaugeField(gauge_param); profileGaugeFix.TPSTOP(QUDA_PROFILE_INIT); - + // Load gauge to device profileGaugeFix.TPSTART(QUDA_PROFILE_H2D); device_gauge->loadCPUField(*cpu_gauge); profileGaugeFix.TPSTOP(QUDA_PROFILE_H2D); - + // Perform the update switch (fix_param->fix_type) { @@ -5585,7 +5586,7 @@ int computeGaugeFixingQuda(void *gauge, QudaGaugeParam *g_param, QudaGaugeFixPar profileGaugeFix.TPSTART(QUDA_PROFILE_D2H); device_gauge->saveCPUField(*cpu_gauge); profileGaugeFix.TPSTOP(QUDA_PROFILE_D2H); - + profileGaugeFix.TPSTOP(QUDA_PROFILE_TOTAL); if (g_param->make_resident_gauge) { @@ -5595,8 +5596,8 @@ int computeGaugeFixingQuda(void *gauge, QudaGaugeParam *g_param, QudaGaugeFixPar delete device_gauge; } delete cpu_gauge; - - if(timeinfo){ + + if (timeinfo) { timeinfo[0] = profileGaugeFix.Last(QUDA_PROFILE_H2D); timeinfo[1] = profileGaugeFix.Last(QUDA_PROFILE_COMPUTE); timeinfo[2] = profileGaugeFix.Last(QUDA_PROFILE_D2H); diff --git a/tests/gauge_alg_test.cpp b/tests/gauge_alg_test.cpp index 91e9068e58..5506f968d7 100644 --- a/tests/gauge_alg_test.cpp +++ b/tests/gauge_alg_test.cpp @@ -56,10 +56,12 @@ void display_test_info() switch (test_type) { case 0: printfQuda("\n Google testing\n"); break; - case 1: printfQuda("\n%s %s gauge fix\n", get_gaugefix_str(gf_fix_type), gf_gauge_dir == 4 ? "Landau" : "Coulomb"); break; + case 1: + printfQuda("\n%s %s gauge fix\n", get_gaugefix_str(gf_fix_type), gf_gauge_dir == 4 ? "Landau" : "Coulomb"); + break; default: errorQuda("Undefined test type %d given", test_type); } - + printfQuda("prec sloppy_prec link_recon sloppy_link_recon S_dimension T_dimension Ls_dimension\n"); printfQuda("%s %s %s %s %d/%d/%d %d %d\n", get_prec_str(prec), get_prec_str(prec_sloppy), get_recon_str(link_recon), get_recon_str(link_recon_sloppy), xdim, ydim, zdim, @@ -97,7 +99,8 @@ void add_gaugefix_option_group(std::shared_ptr quda_app) ->transform(CLI::QUDACheckedTransformer(fix_type_map)); } -void setGaugeFixParam(QudaGaugeFixParam &fix_param) { +void setGaugeFixParam(QudaGaugeFixParam &fix_param) +{ fix_param.fix_type = gf_fix_type; fix_param.gauge_dir = gf_gauge_dir; fix_param.maxiter = gf_maxiter; @@ -168,7 +171,7 @@ class GaugeAlgTest : public ::testing::Test virtual void SetUp() { if (execute) { - + // Setup gauge container. gauge_param = newQudaGaugeParam(); setWilsonGaugeParam(gauge_param); @@ -183,7 +186,7 @@ class GaugeAlgTest : public ::testing::Test // If no field is loaded, create a physical quenched field on the device if (!gauge_load) { - GaugeFieldParam device_gauge_param(gauge_param); + GaugeFieldParam device_gauge_param(gauge_param); device_gauge_param.ghostExchange = QUDA_GHOST_EXCHANGE_EXTENDED; device_gauge_param.create = QUDA_NULL_FIELD_CREATE; device_gauge_param.reconstruct = link_recon; @@ -203,10 +206,12 @@ class GaugeAlgTest : public ::testing::Test coldstart = heatbath_coldstart; beta_value = heatbath_beta_value; host_timer_2.start(); - - if (coldstart) InitGaugeField(*U); - else InitGaugeField(*U, randstates); - + + if (coldstart) + InitGaugeField(*U); + else + InitGaugeField(*U, randstates); + for (int step = 1; step <= nsteps; ++step) { printfQuda("Step %d\n", step); Monte(*U, randstates, beta_value, nhbsteps, novrsteps); @@ -265,19 +270,20 @@ class GaugeAlgTest : public ::testing::Test trace_u = getLinkTrace(*U); printfQuda("Plaq: %.16e, %.16e, %.16e\n", plaq_u.x, plaq_u.y, plaq_u.z); printfQuda("Det: %.16e:%.16e\n", det_u.x, det_u.y); - printfQuda("Tr: %.16e:%.16e\n", trace_u.x / 3.0, trace_u.y / 3.0); - + printfQuda("Tr: %.16e:%.16e\n", trace_u.x / 3.0, trace_u.y / 3.0); + // If a specific test type is requested, perform it now and then // turn off all Google tests in the tear down. switch (test_type) { case 0: // Do the Google testing - // Set gauge fixing params from the command line - // and adjust for this test type - fix_param = newQudaGaugeFixParam(); - setGaugeFixParam(fix_param); - break; + // Set gauge fixing params from the command line + // and adjust for this test type + fix_param = newQudaGaugeFixParam(); + setGaugeFixParam(fix_param); + break; case 1: // Do a specific test - run(); break; + run(); + break; default: errorQuda("Invalid test type %d ", test_type); } @@ -311,7 +317,7 @@ class GaugeAlgTest : public ::testing::Test delete U; // Release all temporary memory used for data exchange between GPUs in multi-GPU mode PGaugeExchangeFree(); - + host_timer_1.stop(); printfQuda("Time -> %.6f s\n", host_timer_1.last()); } @@ -327,27 +333,28 @@ class GaugeAlgTest : public ::testing::Test // Set gauge fixing params from the command line fix_param = newQudaGaugeFixParam(); setGaugeFixParam(fix_param); - - printfQuda("%s gauge fixing with %s method\n", fix_param.gauge_dir == 4 ? "Landau" : "Coulomb", get_gaugefix_str(fix_param.fix_type)); + + printfQuda("%s gauge fixing with %s method\n", fix_param.gauge_dir == 4 ? "Landau" : "Coulomb", + get_gaugefix_str(fix_param.fix_type)); // Setup CPU gauge container. gauge_param = newQudaGaugeParam(); setWilsonGaugeParam(gauge_param); gauge_param.t_boundary = QUDA_PERIODIC_T; gauge_param.location = QUDA_CPU_FIELD_LOCATION; - + void *cpu_gauge[4]; for (int dir = 0; dir < 4; dir++) { cpu_gauge[dir] = safe_malloc(V * gauge_site_size * gauge_param.cpu_prec); } - + GaugeFieldParam param(gauge_param); param.ghostExchange = QUDA_GHOST_EXCHANGE_NO; param.create = QUDA_NULL_FIELD_CREATE; param.link_type = gauge_param.type; param.reconstruct = gauge_param.reconstruct; param.setPrecision(param.Precision(), true); - + auto *gauge = new cudaGaugeField(param); - + // Copy the target U field (extended) into regular GPU field, then // save to a CPU field. This is done to test the CPU interface function // and instructs the user how to use void pointers for the gauge data, @@ -355,7 +362,7 @@ class GaugeAlgTest : public ::testing::Test copyExtendedGauge(*gauge, *U, QUDA_CUDA_FIELD_LOCATION); saveGaugeFieldQuda((void *)cpu_gauge, (void *)gauge, &gauge_param); delete gauge; - + // Compute gauge fixing via interface computeGaugeFixingQuda(cpu_gauge, &gauge_param, &fix_param, nullptr); @@ -363,18 +370,18 @@ class GaugeAlgTest : public ::testing::Test // to the device for inspection in the TearDown. GaugeFieldParam fixed_param(gauge_param, cpu_gauge); auto *fixed_cpu_gauge = new cpuGaugeField(fixed_param); - + // Copy the CPU field to U. - U->loadCPUField(*fixed_cpu_gauge); + U->loadCPUField(*fixed_cpu_gauge); for (int dir = 0; dir < 4; dir++) host_free(cpu_gauge[dir]); delete fixed_cpu_gauge; - + // Save if output string is specified if (gauge_store) save_gauge(); } } - + virtual void save_gauge() { printfQuda("Saving the gauge field to file %s\n", gauge_outfile); @@ -418,11 +425,11 @@ TEST_F(GaugeAlgTest, Landau_Overrelaxation) { if (execute) { printfQuda("Landau gauge fixing with overrelaxation\n"); - + fix_param.fix_type = QUDA_GAUGEFIX_TYPE_OVR; fix_param.gauge_dir = 4; - - gaugeFixingOVR(*U, fix_param); + + gaugeFixingOVR(*U, fix_param); } } @@ -430,11 +437,11 @@ TEST_F(GaugeAlgTest, Coulomb_Overrelaxation) { if (execute) { printfQuda("Coulomb gauge fixing with overrelaxation\n"); - + fix_param.fix_type = QUDA_GAUGEFIX_TYPE_OVR; fix_param.gauge_dir = 3; - gaugeFixingOVR(*U, fix_param); + gaugeFixingOVR(*U, fix_param); } } @@ -443,10 +450,10 @@ TEST_F(GaugeAlgTest, Landau_FFT) if (execute) { if (!comm_partitioned()) { printfQuda("Landau gauge fixing with steepest descent method with FFT\n"); - + fix_param.fix_type = QUDA_GAUGEFIX_TYPE_FFT; fix_param.gauge_dir = 4; - + gaugeFixingFFT(*U, fix_param); } } @@ -457,7 +464,7 @@ TEST_F(GaugeAlgTest, Coulomb_FFT) if (execute) { if (!comm_partitioned()) { printfQuda("Coulomb gauge fixing with steepest descent method with FFT\n"); - + fix_param.fix_type = QUDA_GAUGEFIX_TYPE_FFT; fix_param.gauge_dir = 3; @@ -492,12 +499,12 @@ int main(int argc, char **argv) setQudaPrecisions(); setWilsonGaugeParam(gauge_param); setDims(gauge_param.X); - + // call srand() with a rank-dependent seed initRand(); // initialize the QUDA library initQuda(device_ordinal); - + display_test_info(); // If we are passing a gauge field to the test, we must allocate host memory. diff --git a/tests/utils/command_line_params.cpp b/tests/utils/command_line_params.cpp index 1201590521..4a500bdab7 100644 --- a/tests/utils/command_line_params.cpp +++ b/tests/utils/command_line_params.cpp @@ -503,10 +503,8 @@ std::shared_ptr make_app(std::string app_description, std::string app_n quda_app->add_option("--save-gauge", gauge_outfile, "Save gauge field \" file \" for the test (requires QIO, heatbath test only)"); - quda_app->add_option("--seed", quda_seed, - "Seed value for use in test suite (default 1234)") - ->check(CLI::PositiveNumber); - + quda_app->add_option("--seed", quda_seed, "Seed value for use in test suite (default 1234)")->check(CLI::PositiveNumber); + quda_app->add_option("--solution-pipeline", solution_accumulator_pipeline, "The pipeline length for fused solution accumulation (default 0, no pipelining)"); diff --git a/tests/utils/misc.cpp b/tests/utils/misc.cpp index d9c0537bc3..714b75f862 100644 --- a/tests/utils/misc.cpp +++ b/tests/utils/misc.cpp @@ -160,7 +160,7 @@ const char *get_gaugefix_str(QudaGaugeFixType type) case QUDA_GAUGEFIX_TYPE_FFT: ret = "FFT"; break; default: ret = "unknown"; break; } - + return ret; } From 984c45a8208db3c5d78f6de0fd4df1a5663bc4ac Mon Sep 17 00:00:00 2001 From: cpviolator Date: Tue, 21 Dec 2021 23:24:10 -0800 Subject: [PATCH 30/32] Add precision parameter to QudaGaugeFixParam to specify gauge fixing precision, fix bug where gauge field boundary is not set to periodic --- include/quda.h | 2 ++ lib/gauge_fix_ovr.cu | 1 + lib/interface_quda.cpp | 2 +- tests/gauge_alg_test.cpp | 23 +++++++++++++++-------- tests/utils/host_utils.cpp | 7 +++++-- 5 files changed, 24 insertions(+), 11 deletions(-) diff --git a/include/quda.h b/include/quda.h index 0b7d633ec9..a706b46090 100644 --- a/include/quda.h +++ b/include/quda.h @@ -792,6 +792,8 @@ extern "C" { double tolerance; /**< The tolerance of the gauge fixing quality (default 1e-6) */ QudaBoolean theta_condition; /**< "Use the theta value to determine the gauge fixing if true. If false, use the delta value (default false)" */ + QudaPrecision precision; /**< The precision used by the algorithm */ + } QudaGaugeFixParam; /* diff --git a/lib/gauge_fix_ovr.cu b/lib/gauge_fix_ovr.cu index aff0df2fef..38b7948f99 100644 --- a/lib/gauge_fix_ovr.cu +++ b/lib/gauge_fix_ovr.cu @@ -245,6 +245,7 @@ namespace quda { printfQuda("\tMaximum number of iterations: %d\n", steps); printfQuda("\tReunitarize at every %d steps\n", reunit_interval); printfQuda("\tPrint convergence results at every %d steps\n", verbose_interval); + printfQuda("\tComputing in %s precision\n", sizeof(Float) == sizeof(double) ? "double" : "single"); } const double unitarize_eps = 1e-14; diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp index 28fe842090..59c63d9b1a 100644 --- a/lib/interface_quda.cpp +++ b/lib/interface_quda.cpp @@ -5546,7 +5546,7 @@ int computeGaugeFixingQuda(void *gauge, QudaGaugeParam *g_param, QudaGaugeFixPar gauge_param.create = QUDA_NULL_FIELD_CREATE; gauge_param.link_type = g_param->type; gauge_param.reconstruct = g_param->reconstruct; - gauge_param.setPrecision(gauge_param.Precision(), true); + gauge_param.setPrecision(fix_param->precision, true); auto *device_gauge = new cudaGaugeField(gauge_param); profileGaugeFix.TPSTOP(QUDA_PROFILE_INIT); diff --git a/tests/gauge_alg_test.cpp b/tests/gauge_alg_test.cpp index 5506f968d7..c64011341f 100644 --- a/tests/gauge_alg_test.cpp +++ b/tests/gauge_alg_test.cpp @@ -111,6 +111,7 @@ void setGaugeFixParam(QudaGaugeFixParam &fix_param) fix_param.fft_alpha = gf_fft_alpha; fix_param.fft_autotune = gf_fft_alpha ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE; fix_param.theta_condition = gf_theta_condition ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE; + fix_param.precision = cuda_prec; } class GaugeAlgTest : public ::testing::Test @@ -161,11 +162,11 @@ class GaugeAlgTest : public ::testing::Test return ((a0 < prec_val) && (a1 < prec_val) && (a2 < prec_val)); } - bool CheckDeterminant(double2 detu) + bool CheckDeterminant(double2 det) { - double prec_val = 5e-8; + double prec_val = 1.0e-5; if (prec == QUDA_DOUBLE_PRECISION) prec_val = gf_tolerance * 1e2; - return (std::abs(1.0 - detu.x) < prec_val && std::abs(detu.y) < prec_val); + return (std::abs(1.0 - det.x) < prec_val && std::abs(det.y) < prec_val); } virtual void SetUp() @@ -190,7 +191,7 @@ class GaugeAlgTest : public ::testing::Test device_gauge_param.ghostExchange = QUDA_GHOST_EXCHANGE_EXTENDED; device_gauge_param.create = QUDA_NULL_FIELD_CREATE; device_gauge_param.reconstruct = link_recon; - device_gauge_param.setPrecision(prec, true); + device_gauge_param.setPrecision(cuda_prec, true); for (int d = 0; d < 4; d++) { if (comm_dim_partitioned(d)) device_gauge_param.r[d] = 2; device_gauge_param.x[d] += 2 * device_gauge_param.r[d]; @@ -344,14 +345,14 @@ class GaugeAlgTest : public ::testing::Test gauge_param.location = QUDA_CPU_FIELD_LOCATION; void *cpu_gauge[4]; - for (int dir = 0; dir < 4; dir++) { cpu_gauge[dir] = safe_malloc(V * gauge_site_size * gauge_param.cpu_prec); } + for (int dir = 0; dir < 4; dir++) { cpu_gauge[dir] = safe_malloc(V * gauge_site_size * cpu_prec); } GaugeFieldParam param(gauge_param); param.ghostExchange = QUDA_GHOST_EXCHANGE_NO; param.create = QUDA_NULL_FIELD_CREATE; param.link_type = gauge_param.type; param.reconstruct = gauge_param.reconstruct; - param.setPrecision(param.Precision(), true); + param.setPrecision(cuda_prec, true); auto *gauge = new cudaGaugeField(param); @@ -379,6 +380,7 @@ class GaugeAlgTest : public ::testing::Test // Save if output string is specified if (gauge_store) save_gauge(); + saveTuneCache(); } } @@ -424,24 +426,26 @@ TEST_F(GaugeAlgTest, Generation) TEST_F(GaugeAlgTest, Landau_Overrelaxation) { if (execute) { - printfQuda("Landau gauge fixing with overrelaxation\n"); + printfQuda("Landau gauge fixing with overrelaxation method\n"); fix_param.fix_type = QUDA_GAUGEFIX_TYPE_OVR; fix_param.gauge_dir = 4; gaugeFixingOVR(*U, fix_param); + saveTuneCache(); } } TEST_F(GaugeAlgTest, Coulomb_Overrelaxation) { if (execute) { - printfQuda("Coulomb gauge fixing with overrelaxation\n"); + printfQuda("Coulomb gauge fixing with overrelaxation method\n"); fix_param.fix_type = QUDA_GAUGEFIX_TYPE_OVR; fix_param.gauge_dir = 3; gaugeFixingOVR(*U, fix_param); + saveTuneCache(); } } @@ -455,6 +459,7 @@ TEST_F(GaugeAlgTest, Landau_FFT) fix_param.gauge_dir = 4; gaugeFixingFFT(*U, fix_param); + saveTuneCache(); } } } @@ -469,6 +474,7 @@ TEST_F(GaugeAlgTest, Coulomb_FFT) fix_param.gauge_dir = 3; gaugeFixingFFT(*U, fix_param); + saveTuneCache(); } } } @@ -498,6 +504,7 @@ int main(int argc, char **argv) setVerbosity(verbosity); setQudaPrecisions(); setWilsonGaugeParam(gauge_param); + gauge_param.t_boundary = QUDA_PERIODIC_T; setDims(gauge_param.X); // call srand() with a rank-dependent seed diff --git a/tests/utils/host_utils.cpp b/tests/utils/host_utils.cpp index ff3d60ced5..640b501c70 100644 --- a/tests/utils/host_utils.cpp +++ b/tests/utils/host_utils.cpp @@ -980,8 +980,11 @@ int x4_from_full_index(int i) template void applyGaugeFieldScaling(Float **gauge, int Vh, QudaGaugeParam *param) { // Apply spatial scaling factor (u0) to spatial links - for (int d = 0; d < 3; d++) { - for (int i = 0; i < gauge_site_size * Vh * 2; i++) { gauge[d][i] /= param->anisotropy; } + if(param->anisotropy != 1.0) { + double aniso_inv = 1.0/param->anisotropy; + for (int d = 0; d < 3; d++) { + for (int i = 0; i < gauge_site_size * Vh * 2; i++) { gauge[d][i] *= aniso_inv; } + } } // Apply boundary conditions to temporal links From 7e83e14921ac9d17406d48bc8de86bfb872c341c Mon Sep 17 00:00:00 2001 From: cpviolator Date: Tue, 21 Dec 2021 23:24:37 -0800 Subject: [PATCH 31/32] clang tidy --- include/quda.h | 2 +- tests/utils/host_utils.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/quda.h b/include/quda.h index a706b46090..ced9d2dc25 100644 --- a/include/quda.h +++ b/include/quda.h @@ -793,7 +793,7 @@ extern "C" { QudaBoolean theta_condition; /**< "Use the theta value to determine the gauge fixing if true. If false, use the delta value (default false)" */ QudaPrecision precision; /**< The precision used by the algorithm */ - + } QudaGaugeFixParam; /* diff --git a/tests/utils/host_utils.cpp b/tests/utils/host_utils.cpp index 640b501c70..982ebd66cd 100644 --- a/tests/utils/host_utils.cpp +++ b/tests/utils/host_utils.cpp @@ -980,8 +980,8 @@ int x4_from_full_index(int i) template void applyGaugeFieldScaling(Float **gauge, int Vh, QudaGaugeParam *param) { // Apply spatial scaling factor (u0) to spatial links - if(param->anisotropy != 1.0) { - double aniso_inv = 1.0/param->anisotropy; + if (param->anisotropy != 1.0) { + double aniso_inv = 1.0 / param->anisotropy; for (int d = 0; d < 3; d++) { for (int i = 0; i < gauge_site_size * Vh * 2; i++) { gauge[d][i] *= aniso_inv; } } From b1e25b05b7f0842614630f4f09637aa1383a743d Mon Sep 17 00:00:00 2001 From: cpviolator Date: Tue, 21 Dec 2021 23:43:45 -0800 Subject: [PATCH 32/32] Adjust determinant tolerance for double precision --- tests/gauge_alg_test.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/gauge_alg_test.cpp b/tests/gauge_alg_test.cpp index c64011341f..e4ef084b2d 100644 --- a/tests/gauge_alg_test.cpp +++ b/tests/gauge_alg_test.cpp @@ -158,14 +158,14 @@ class GaugeAlgTest : public ::testing::Test a1 = std::abs(a.y - b.y); a2 = std::abs(a.z - b.z); double prec_val = 1.0e-5; - if (prec == QUDA_DOUBLE_PRECISION) prec_val = gf_tolerance * 1e2; + if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1e-10; return ((a0 < prec_val) && (a1 < prec_val) && (a2 < prec_val)); } - bool CheckDeterminant(double2 det) + bool checkDeterminant(double2 det) { double prec_val = 1.0e-5; - if (prec == QUDA_DOUBLE_PRECISION) prec_val = gf_tolerance * 1e2; + if (prec == QUDA_DOUBLE_PRECISION) prec_val = 1e-8; return (std::abs(1.0 - det.x) < prec_val && std::abs(det.y) < prec_val); } @@ -313,7 +313,7 @@ class GaugeAlgTest : public ::testing::Test // The determinant of any SU(N) gauge field element must be (1.0,0.0) to // machine precision - ASSERT_TRUE(CheckDeterminant(det_gf)); + ASSERT_TRUE(checkDeterminant(det_gf)); delete U; // Release all temporary memory used for data exchange between GPUs in multi-GPU mode @@ -419,7 +419,7 @@ TEST_F(GaugeAlgTest, Generation) // Assert that the generated gauge is // on the SU(N) manifold det_u = getLinkDeterminant(*U); - ASSERT_TRUE(CheckDeterminant(det_u)); + ASSERT_TRUE(checkDeterminant(det_u)); } }