diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 7826d25cbf..66c42c9c2c 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -1124,20 +1124,6 @@ foreach(pol IN LISTS DSLASH_POLICIES) endforeach(pol) -# enable the precisions that are compiled -math(EXPR double_prec "${QUDA_PRECISION} & 8") -math(EXPR single_prec "${QUDA_PRECISION} & 4") -math(EXPR half_prec "${QUDA_PRECISION} & 2") -math(EXPR quarter_prec "${QUDA_PRECISION} & 1") - -if(double_prec AND single_prec) - set(TEST_PRECS single double) -elseif(double_prec) - set(TEST_PRECS double) -elseif(single_prec) - set(TEST_PRECS single) -endif() - # Wilson-type Inversions if(QUDA_DIRAC_WILSON) add_test(NAME invert_test_wilson @@ -1301,60 +1287,44 @@ if(QUDA_DIRAC_DOMAIN_WALL) endif() # Staggered-type Inversions -foreach(prec IN LISTS TEST_PRECS) +if(QUDA_DIRAC_STAGGERED) + # --compute-fat-long true is necessary to get well-behaved fields - # These require looser tolerances to keep iterations to solution in check - if(${prec} STREQUAL "double") - set(tol 1e-6) - elseif(${prec} STREQUAL "single") - set(tol 1e-5) - endif() + add_test(NAME invert_test_staggered + COMMAND ${QUDA_CTEST_LAUNCH} $ ${MPIEXEC_POSTFLAGS} + --dslash-type staggered --ngcrkrylov 8 --compute-fat-long true + --dim 2 4 6 8 --niter 1000 --enable-testing true + --gtest_output=xml:invert_test_staggered.xml) - if(QUDA_DIRAC_STAGGERED) - # --compute-fat-long true is necessary to get well-behaved fields + add_test(NAME invert_test_splitgrid_staggered + COMMAND ${QUDA_CTEST_LAUNCH} $ ${MPIEXEC_POSTFLAGS} + --dslash-type staggered --ngcrkrylov 8 --compute-fat-long true + --dim 2 4 6 8 --niter 1000 --nsrc ${QUDA_TEST_NUM_PROCS} --nsrc-tile ${QUDA_TEST_NUM_PROCS} + --enable-testing true + --gtest_output=xml:invert_test_splitgrid_staggered.xml) - add_test(NAME invert_test_staggered_${prec} - COMMAND ${QUDA_CTEST_LAUNCH} $ ${MPIEXEC_POSTFLAGS} - --dslash-type staggered --ngcrkrylov 8 --compute-fat-long true - --dim 2 4 6 8 --prec ${prec} --tol ${tol} --tolhq ${tol} --niter 1000 - --enable-testing true - --gtest_output=xml:invert_test_staggered_${prec}.xml) + set_tests_properties(invert_test_splitgrid_staggered PROPERTIES ENVIRONMENT QUDA_TEST_GRID_PARTITION=$ENV{QUDA_TEST_GRID_SIZE}) - add_test(NAME invert_test_splitgrid_staggered_${prec} - COMMAND ${QUDA_CTEST_LAUNCH} $ ${MPIEXEC_POSTFLAGS} - --dslash-type staggered --ngcrkrylov 8 --compute-fat-long true - --dim 2 4 6 8 --prec ${prec} --tol ${tol} --tolhq ${tol} --niter 1000 - --nsrc ${QUDA_TEST_NUM_PROCS} --nsrc-tile ${QUDA_TEST_NUM_PROCS} - --enable-testing true - --gtest_output=xml:invert_test_splitgrid_staggered_${prec}.xml) + add_test(NAME invert_test_asqtad + COMMAND ${QUDA_CTEST_LAUNCH} $ ${MPIEXEC_POSTFLAGS} + --dslash-type asqtad --ngcrkrylov 8 --compute-fat-long true + --dim 6 6 6 8 --niter 1000 --enable-testing true --nsrc 4 --nsrc-tile 4 + --gtest_output=xml:invert_test_asqtad.xml) - set_tests_properties(invert_test_splitgrid_staggered_${prec} PROPERTIES ENVIRONMENT QUDA_TEST_GRID_PARTITION=$ENV{QUDA_TEST_GRID_SIZE}) + add_test(NAME invert_test_splitgrid_asqtad + COMMAND ${QUDA_CTEST_LAUNCH} $ ${MPIEXEC_POSTFLAGS} + --dslash-type asqtad --ngcrkrylov 8 --compute-fat-long true + --dim 6 6 6 8 --niter 1000 --nsrc ${QUDA_TEST_NUM_PROCS} --enable-testing true + --gtest_output=xml:invert_test_splitgrid_asqtad) - add_test(NAME invert_test_asqtad_${prec} - COMMAND ${QUDA_CTEST_LAUNCH} $ ${MPIEXEC_POSTFLAGS} - --dslash-type asqtad --ngcrkrylov 8 --compute-fat-long true - --dim 6 6 6 8 --prec ${prec} --tol ${tol} --tolhq ${tol} --niter 1000 - --enable-testing true --nsrc 4 --nsrc-tile 4 - --gtest_output=xml:invert_test_asqtad_${prec}.xml) - - add_test(NAME invert_test_splitgrid_asqtad_${prec} + if (QUDA_DIRAC_LAPLACE) + add_test(NAME invert_test_laplace COMMAND ${QUDA_CTEST_LAUNCH} $ ${MPIEXEC_POSTFLAGS} - --dslash-type asqtad --ngcrkrylov 8 --compute-fat-long true - --dim 6 6 6 8 --prec ${prec} --tol ${tol} --tolhq ${tol} --niter 1000 - --nsrc ${QUDA_TEST_NUM_PROCS} - --enable-testing true - --gtest_output=xml:invert_test_splitgrid_asqtad_${prec}.xml) - - if (QUDA_DIRAC_LAPLACE) - add_test(NAME invert_test_laplace_${prec} - COMMAND ${QUDA_CTEST_LAUNCH} $ ${MPIEXEC_POSTFLAGS} - --dslash-type laplace --ngcrkrylov 8 --compute-fat-long true - --dim 2 4 6 8 --prec ${prec} --tol ${tol} --tolhq ${tol} --niter 1000 - --enable-testing true - --gtest_output=xml:invert_test_laplace_${prec}.xml) - endif() + --dslash-type laplace --ngcrkrylov 8 --compute-fat-long true + --dim 2 4 6 8 --niter 1000 --enable-testing true + --gtest_output=xml:invert_test_laplace.xml) endif() -endforeach(prec) +endif() # Distance preconditioning for Wilson/clover if (QUDA_DIRAC_DISTANCE_PRECONDITIONING) @@ -1618,44 +1588,27 @@ add_test(NAME gauge_path --dim 2 4 6 8 --enable-testing true --niter 1 --gtest_output=xml:gauge_path_test.xml) -foreach(prec IN LISTS TEST_PRECS) - - if(QUDA_DIRAC_STAGGERED) - add_test(NAME unitarize_link_${prec} - COMMAND ${QUDA_CTEST_LAUNCH} $ ${MPIEXEC_POSTFLAGS} - --dim 2 4 6 8 --prec ${prec} - --gtest_output=xml:unitarize_link_test_${prec}.xml) - - add_test(NAME hisq_paths_force_${prec} - COMMAND ${QUDA_CTEST_LAUNCH} $ ${MPIEXEC_POSTFLAGS} - --dim 2 4 6 8 --prec ${prec} - --gtest_output=xml:hisq_paths_force_test_${prec}.xml) - - add_test(NAME hisq_unitarize_force_${prec} - COMMAND ${QUDA_CTEST_LAUNCH} $ ${MPIEXEC_POSTFLAGS} - --dim 2 4 6 8 --prec ${prec} - --gtest_output=xml:hisq_unitarize_force_test_${prec}.xml) - - set(KERNEL_TYPE TwoLink GaussianSmear) - foreach(kerneltp IN LISTS KERNEL_TYPE) - if(${kerneltp} STREQUAL "TwoLink") - set(laplace3D 4) - set(smear_t0 -1) - elseif(${kerneltp} STREQUAL "GaussianSmear") - set(laplace3D 3) - set(smear_t0 1) - endif() - add_test(NAME staggered_gsmear_${kerneltp}_${prec} - COMMAND ${QUDA_CTEST_LAUNCH} $ ${MPIEXEC_POSTFLAGS} - --dim 6 8 10 12 --prec ${prec} - --test ${kerneltp} - --laplace3D ${laplace3D} - --smear-t0 ${smear_t0} - --gtest_output=xml:staggered_gsmear_test_${kerneltp}_${prec}.xml) - endforeach(kerneltp) - endif() - -endforeach(prec) +if(QUDA_DIRAC_STAGGERED) + add_test(NAME unitarize_link + COMMAND ${QUDA_CTEST_LAUNCH} $ ${MPIEXEC_POSTFLAGS} + --dim 2 4 6 8 --enable-testing true + --gtest_output=xml:unitarize_link_test.xml) + + add_test(NAME hisq_unitarize_force + COMMAND ${QUDA_CTEST_LAUNCH} $ ${MPIEXEC_POSTFLAGS} + --dim 2 4 6 8 --enable-testing true + --gtest_output=xml:hisq_unitarize_force_test.xml) + + add_test(NAME hisq_paths_force + COMMAND ${QUDA_CTEST_LAUNCH} $ ${MPIEXEC_POSTFLAGS} + --dim 2 4 6 8 --enable-testing true + --gtest_output=xml:hisq_paths_force_test}.xml) + + add_test(NAME staggered_gsmear + COMMAND ${QUDA_CTEST_LAUNCH} $ ${MPIEXEC_POSTFLAGS} + --dim 6 8 10 12 --enable-testing true + --gtest_output=xml:staggered_gsmear_test.xml) +endif() add_test(NAME gauge_alg COMMAND ${QUDA_CTEST_LAUNCH} $ ${MPIEXEC_POSTFLAGS} diff --git a/tests/hisq_paths_force_test.cpp b/tests/hisq_paths_force_test.cpp index 3e9646b634..3b97b30978 100644 --- a/tests/hisq_paths_force_test.cpp +++ b/tests/hisq_paths_force_test.cpp @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include "host_utils.h" #include "momentum_utils.h" @@ -18,41 +18,10 @@ using namespace quda; -GaugeField *cpuGauge = NULL; -GaugeField *cudaForce = NULL; -GaugeField *cpuForce = NULL; -GaugeField *hostVerifyForce = NULL; - -GaugeField *cudaMom = NULL; -GaugeField *cpuMom = NULL; -GaugeField *refMom = NULL; - QudaGaugeFieldOrder gauge_order = QUDA_QDP_GAUGE_ORDER; -GaugeField *cpuOprod = NULL; -GaugeField *cudaOprod = NULL; -GaugeField *cpuLongLinkOprod = NULL; -GaugeField *cudaLongLinkOprod = NULL; - int ODD_BIT = 1; -QudaPrecision force_prec = QUDA_DOUBLE_PRECISION; - -GaugeField *cudaGauge_ex = NULL; -GaugeField *cpuGauge_ex = NULL; -GaugeField *cudaForce_ex = NULL; -GaugeField *cpuForce_ex = NULL; -GaugeField *cpuOprod_ex = NULL; -GaugeField *cudaOprod_ex = NULL; -GaugeField *cpuLongLinkOprod_ex = NULL; -GaugeField *cudaLongLinkOprod_ex = NULL; - -static void setPrecision(QudaPrecision precision) -{ - force_prec = precision; - return; -} - /** @brief Compute the aggregate bytes and flops for various components within the HISQ force @@ -156,15 +125,32 @@ static lat_dim_t R = {2, 2, 2, 2}; static lat_dim_t R = {0, 0, 0, 0}; #endif -// one-time initializations at start of tests -static void hisq_force_startup() +using test_t = ::testing::tuple; + +class HisqForceTest : public ::testing::TestWithParam +{ +protected: + QudaPrecision force_prec; + bool lepage; + +public: + HisqForceTest() : force_prec(::testing::get<0>(GetParam())), lepage(::testing::get<1>(GetParam())) { } +}; + +int hisq_force_test(QudaPrecision force_prec, bool lepage) { + if (force_prec != QUDA_DOUBLE_PRECISION && force_prec != QUDA_SINGLE_PRECISION) + errorQuda("Invalid precision %d", force_prec); + // FIXME: debugging recon 12 + if (link_recon != QUDA_RECONSTRUCT_NO && link_recon != QUDA_RECONSTRUCT_13) + errorQuda("Invalid reconstruct %d", link_recon); + int X[4] = {xdim, ydim, zdim, tdim}; setDims(X); setVerbosity(verbosity); - quda::RNG *rng; + quda::RNG rng; // initialize RNG { @@ -183,7 +169,7 @@ static void hisq_force_startup() param.location = QUDA_CPU_FIELD_LOCATION; for (int d = 0; d < 4; d++) param.x[d] = X[d]; quda::ColorSpinorField spinor_in(param); - rng = new quda::RNG(spinor_in, 1234); + rng = quda::RNG(spinor_in, 1234); } QudaGaugeParam qudaGaugeParam = newQudaGaugeParam(); @@ -192,7 +178,7 @@ static void hisq_force_startup() for (int d = 0; d < 4; d++) qudaGaugeParam.X[d] = X[d]; // need to do some thinking for recon - qudaGaugeParam.cpu_prec = force_prec; + qudaGaugeParam.cpu_prec = cpu_prec; qudaGaugeParam.cuda_prec = force_prec; qudaGaugeParam.reconstruct = (link_recon == QUDA_RECONSTRUCT_12 ? QUDA_RECONSTRUCT_13 : link_recon); qudaGaugeParam.type = QUDA_GENERAL_LINKS; @@ -216,11 +202,8 @@ static void hisq_force_startup() #endif qudaGaugeParam_ex.ga_pad = 3 * pad_size; // long links - GaugeFieldParam gParam_ex; - GaugeFieldParam gParam; - // create device gauge field - gParam_ex = GaugeFieldParam(qudaGaugeParam_ex); + GaugeFieldParam gParam_ex = GaugeFieldParam(qudaGaugeParam_ex); gParam_ex.location = QUDA_CUDA_FIELD_LOCATION; gParam_ex.ghostExchange = QUDA_GHOST_EXCHANGE_EXTENDED; gParam_ex.create = QUDA_NULL_FIELD_CREATE; @@ -230,18 +213,18 @@ static void hisq_force_startup() gParam_ex.r[d] = (comm_dim_partitioned(d)) ? 2 : 0; gParam_ex.x[d] = X[d] + 2 * gParam_ex.r[d]; } // set halo region for GPU - cudaGauge_ex = new GaugeField(gParam_ex); + GaugeField cudaGauge_ex(gParam_ex); // Create the host gauge field memcpy(&qudaGaugeParam_ex, &qudaGaugeParam, sizeof(QudaGaugeParam)); - gParam = GaugeFieldParam(qudaGaugeParam); + GaugeFieldParam gParam = GaugeFieldParam(qudaGaugeParam); gParam.location = QUDA_CPU_FIELD_LOCATION; gParam.ghostExchange = QUDA_GHOST_EXCHANGE_NO; gParam.create = QUDA_NULL_FIELD_CREATE; gParam.link_type = QUDA_GENERAL_LINKS; gParam.order = gauge_order; - cpuGauge = new GaugeField(gParam); + GaugeField cpuGauge(gParam); gParam_ex = GaugeFieldParam(qudaGaugeParam_ex); gParam.location = QUDA_CPU_FIELD_LOCATION; @@ -253,14 +236,14 @@ static void hisq_force_startup() gParam_ex.r[d] = R[d]; gParam_ex.x[d] = gParam.x[d] + 2 * gParam_ex.r[d]; } // set halo region for CPU - cpuGauge_ex = new GaugeField(gParam_ex); + GaugeField cpuGauge_ex(gParam_ex); auto generated_link_type = (link_recon == QUDA_RECONSTRUCT_NO ? SiteLinkType::SITELINK_PHASE_NO : (link_recon == QUDA_RECONSTRUCT_13 ? SiteLinkType::SITELINK_PHASE_U1 : SiteLinkType::SITELINK_PHASE_MILC)); - createSiteLinkCPU(*cpuGauge, qudaGaugeParam.cpu_prec, generated_link_type); - copyExtendedGauge(*cpuGauge_ex, *cpuGauge, QUDA_CPU_FIELD_LOCATION); + createSiteLinkCPU(cpuGauge, qudaGaugeParam.cpu_prec, generated_link_type); + copyExtendedGauge(cpuGauge_ex, cpuGauge, QUDA_CPU_FIELD_LOCATION); qudaGaugeParam.type = QUDA_GENERAL_LINKS; qudaGaugeParam.t_boundary = QUDA_PERIODIC_T; @@ -279,16 +262,16 @@ static void hisq_force_startup() **************************/ gParam.location = QUDA_CPU_FIELD_LOCATION; gParam.reconstruct = QUDA_RECONSTRUCT_NO; - gParam.setPrecision(prec); + gParam.setPrecision(cpu_prec); gParam.create = QUDA_NULL_FIELD_CREATE; gParam.link_type = QUDA_GENERAL_LINKS; gParam.order = gauge_order; - cpuForce = new GaugeField(gParam); - hostVerifyForce = new GaugeField(gParam); + GaugeField cpuForce(gParam); + GaugeField hostVerifyForce(gParam); gParam_ex.location = QUDA_CPU_FIELD_LOCATION; gParam_ex.reconstruct = QUDA_RECONSTRUCT_NO; - gParam_ex.setPrecision(prec); + gParam_ex.setPrecision(cpu_prec); gParam_ex.create = QUDA_NULL_FIELD_CREATE; gParam_ex.link_type = QUDA_GENERAL_LINKS; gParam_ex.order = gauge_order; @@ -296,33 +279,32 @@ static void hisq_force_startup() gParam_ex.r[d] = R[d]; gParam_ex.x[d] = gParam.x[d] + 2 * gParam_ex.r[d]; } - cpuForce_ex = new GaugeField(gParam_ex); + GaugeField cpuForce_ex(gParam_ex); // create the momentum matrix gParam.location = QUDA_CPU_FIELD_LOCATION; gParam.reconstruct = QUDA_RECONSTRUCT_10; gParam.link_type = QUDA_ASQTAD_MOM_LINKS; - gParam.setPrecision(prec); + gParam.setPrecision(cpu_prec); gParam.ghostExchange = QUDA_GHOST_EXCHANGE_NO; gParam.order = QUDA_MILC_GAUGE_ORDER; gParam.create = QUDA_NULL_FIELD_CREATE; - cpuMom = new GaugeField(gParam); - refMom = new GaugeField(gParam); + GaugeField cpuMom(gParam); + GaugeField refMom(gParam); /********************************** * Create the outer product fields * **********************************/ // Create four full-volume random spinor fields - void *stag_for_oprod = safe_malloc(4 * cpuGauge->Volume() * stag_spinor_site_size * force_prec); + std::vector stag_for_oprod(4 * cpuGauge.Volume() * stag_spinor_site_size * cpu_prec); // Allocate the outer product fields and populate them with the random spinor fields gParam.link_type = QUDA_GENERAL_LINKS; gParam.reconstruct = QUDA_RECONSTRUCT_NO; gParam.order = gauge_order; - cpuOprod = new GaugeField(gParam); - cpuLongLinkOprod = new GaugeField(gParam); - + GaugeField cpuOprod(gParam); + GaugeField cpuLongLinkOprod(gParam); // Create extended outer product fields gParam_ex.location = QUDA_CPU_FIELD_LOCATION; gParam_ex.link_type = QUDA_GENERAL_LINKS; @@ -332,97 +314,63 @@ static void hisq_force_startup() gParam_ex.r[d] = R[d]; gParam_ex.x[d] = gParam.x[d] + 2 * gParam_ex.r[d]; } // set halo region for CPU - cpuOprod_ex = new GaugeField(gParam_ex); - cpuLongLinkOprod_ex = new GaugeField(gParam_ex); + GaugeField cpuOprod_ex(gParam_ex); + GaugeField cpuLongLinkOprod_ex(gParam_ex); // initialize the CPU outer product fields and exchange once - createStagForOprodCPU(stag_for_oprod, force_prec, qudaGaugeParam.X, *rng); - computeLinkOrderedOuterProduct(stag_for_oprod, *cpuOprod, force_prec, 1); - computeLinkOrderedOuterProduct(stag_for_oprod, *cpuLongLinkOprod, force_prec, 3); + createStagForOprodCPU(stag_for_oprod.data(), cpu_prec, qudaGaugeParam.X, rng); + computeLinkOrderedOuterProduct(stag_for_oprod.data(), cpuOprod, 1); - copyExtendedGauge(*cpuOprod_ex, *cpuOprod, QUDA_CPU_FIELD_LOCATION); - copyExtendedGauge(*cpuLongLinkOprod_ex, *cpuLongLinkOprod, QUDA_CPU_FIELD_LOCATION); + computeLinkOrderedOuterProduct(stag_for_oprod.data(), cpuLongLinkOprod, 3); - // free the initial spinor field - host_free(stag_for_oprod); + copyExtendedGauge(cpuOprod_ex, cpuOprod, QUDA_CPU_FIELD_LOCATION); + copyExtendedGauge(cpuLongLinkOprod_ex, cpuLongLinkOprod, QUDA_CPU_FIELD_LOCATION); /************************** * Create remaining fields * ***************************/ gParam_ex.location = QUDA_CUDA_FIELD_LOCATION; gParam_ex.reconstruct = QUDA_RECONSTRUCT_NO; - gParam_ex.setPrecision(prec, true); + gParam_ex.setPrecision(force_prec, true); for (int d = 0; d < 4; d++) { gParam_ex.r[d] = (comm_dim_partitioned(d)) ? 2 : 0; gParam_ex.x[d] = gParam.x[d] + 2 * gParam_ex.r[d]; } // set halo region - cudaForce_ex = new GaugeField(gParam_ex); - cudaOprod_ex = new GaugeField(gParam_ex); - cudaLongLinkOprod_ex = new GaugeField(gParam_ex); + GaugeField cudaForce_ex(gParam_ex); + GaugeField cudaOprod_ex(gParam_ex); + GaugeField cudaLongLinkOprod_ex(gParam_ex); // create a device force for verify gParam.location = QUDA_CUDA_FIELD_LOCATION; gParam.create = QUDA_NULL_FIELD_CREATE; gParam.reconstruct = QUDA_RECONSTRUCT_NO; gParam.link_type = QUDA_GENERAL_LINKS; - gParam.setPrecision(prec, true); - cudaForce = new GaugeField(gParam); + gParam.setPrecision(force_prec, true); + GaugeField cudaForce(gParam); // create the device momentum field gParam.location = QUDA_CUDA_FIELD_LOCATION; gParam.create = QUDA_NULL_FIELD_CREATE; gParam.reconstruct = QUDA_RECONSTRUCT_10; gParam.link_type = QUDA_ASQTAD_MOM_LINKS; - gParam.setPrecision(prec, true); - cudaMom = new GaugeField(gParam); + gParam.setPrecision(force_prec, true); + GaugeField cudaMom(gParam); /******************************************************************** * Copy to and exchange gauge and outer product fields on the device * ********************************************************************/ - cpuGauge_ex->exchangeExtendedGhost(R, true); - cudaGauge_ex->copy(*cpuGauge); - cudaGauge_ex->exchangeExtendedGhost(cudaGauge_ex->R()); - - cpuOprod_ex->exchangeExtendedGhost(R, true); - cudaOprod_ex->copy(*cpuOprod); - cudaOprod_ex->exchangeExtendedGhost(cudaOprod_ex->R()); - - cpuLongLinkOprod_ex->exchangeExtendedGhost(R, true); - cudaLongLinkOprod_ex->copy(*cpuLongLinkOprod); - cudaLongLinkOprod_ex->exchangeExtendedGhost(cudaLongLinkOprod_ex->R()); - - /********************** - * Do a little cleanup * - **********************/ - delete rng; -} + cpuGauge_ex.exchangeExtendedGhost(R, true); + cudaGauge_ex.copy(cpuGauge); + cudaGauge_ex.exchangeExtendedGhost(cudaGauge_ex.R()); -// one-time teardown at end of tests -static void hisq_force_teardown() -{ - delete cudaMom; - delete cudaForce; - delete cudaForce_ex; - delete cudaGauge_ex; - delete cudaOprod_ex; - delete cudaLongLinkOprod_ex; - - delete cpuGauge; - delete cpuForce; - delete hostVerifyForce; - delete cpuMom; - delete refMom; - delete cpuOprod; - delete cpuLongLinkOprod; - - delete cpuGauge_ex; - delete cpuForce_ex; - delete cpuOprod_ex; - delete cpuLongLinkOprod_ex; -} + cpuOprod_ex.exchangeExtendedGhost(R, true); + cudaOprod_ex.copy(cpuOprod); + cudaOprod_ex.exchangeExtendedGhost(cudaOprod_ex.R()); + + cpuLongLinkOprod_ex.exchangeExtendedGhost(R, true); + cudaLongLinkOprod_ex.copy(cpuLongLinkOprod); + cudaLongLinkOprod_ex.exchangeExtendedGhost(cudaLongLinkOprod_ex.R()); -static int hisq_force_test(bool lepage) -{ // float d_weight = 1.0; // { one, naik, three, five, seven, lepage } // double d_act_path_coeff[6] = { 1., 0., 0., 0., 0., 0. }; @@ -438,41 +386,41 @@ static int hisq_force_test(bool lepage) /******************************** * Zero momenta and force fields * ********************************/ - cpuForce->zero(); - cpuForce_ex->zero(); - cpuMom->zero(); - refMom->zero(); + cpuForce.zero(); + cpuForce_ex.zero(); + cpuMom.zero(); + refMom.zero(); - cudaForce->zero(); - cudaForce_ex->zero(); - cudaMom->zero(); + cudaForce.zero(); + cudaForce_ex.zero(); + cudaMom.zero(); /************************************** * Force contribution from the staples * **************************************/ host_timer.start(); - fermion_force::hisqStaplesForce(*cudaForce_ex, *cudaOprod_ex, *cudaGauge_ex, d_act_path_coeff); + fermion_force::hisqStaplesForce(cudaForce_ex, cudaOprod_ex, cudaGauge_ex, d_act_path_coeff); qudaDeviceSynchronize(); host_timer.stop(); staple_time_sec = host_timer.last(); if (verify_results) { host_timer.start(); - hisqStaplesForceCPU(d_act_path_coeff, *cpuOprod_ex, *cpuGauge_ex, cpuForce_ex); + hisqStaplesForceCPU(d_act_path_coeff, cpuOprod_ex, cpuGauge_ex, &cpuForce_ex); host_timer.stop(); host_time_sec = host_timer.last(); - copyExtendedGauge(*cpuForce, *cpuForce_ex, QUDA_CPU_FIELD_LOCATION); - copyExtendedGauge(*cudaForce, *cudaForce_ex, QUDA_CUDA_FIELD_LOCATION); - hostVerifyForce->copy(*cudaForce); + copyExtendedGauge(cpuForce, cpuForce_ex, QUDA_CPU_FIELD_LOCATION); + copyExtendedGauge(cudaForce, cudaForce_ex, QUDA_CUDA_FIELD_LOCATION); + hostVerifyForce.copy(cudaForce); int res = 1; for (int dir = 0; dir < 4; dir++) { - res &= compare_floats(cpuForce->data(dir), hostVerifyForce->data(dir), V * gauge_site_size, - getTolerance(force_prec), force_prec); + res &= compare_floats(cpuForce.data(dir), hostVerifyForce.data(dir), V * gauge_site_size, + getTolerance(force_prec), cpu_prec); } - strong_check_link(*hostVerifyForce, "GPU result:", *cpuForce, "CPU reference results:"); + strong_check_link(hostVerifyForce, "GPU result:", cpuForce, "CPU reference results:"); logQuda(QUDA_SUMMARIZE, "Lepage %s staples force test %s\n\n", lepage ? "enabled" : "disabled", (1 == res) ? "PASSED" : "FAILED"); } @@ -485,53 +433,53 @@ static int hisq_force_test(bool lepage) // This is consistent with the chain rule for HISQ if (lepage && d_act_path_coeff[1] != 0.) { host_timer.start(); - fermion_force::hisqLongLinkForce(*cudaForce_ex, *cudaLongLinkOprod_ex, *cudaGauge_ex, d_act_path_coeff[1]); + fermion_force::hisqLongLinkForce(cudaForce_ex, cudaLongLinkOprod_ex, cudaGauge_ex, d_act_path_coeff[1]); qudaDeviceSynchronize(); host_timer.stop(); long_time_sec = host_timer.last(); if (verify_results) { host_timer.start(); - hisqLongLinkForceCPU(d_act_path_coeff[1], *cpuLongLinkOprod_ex, *cpuGauge_ex, cpuForce_ex); + hisqLongLinkForceCPU(d_act_path_coeff[1], cpuLongLinkOprod_ex, cpuGauge_ex, &cpuForce_ex); host_timer.stop(); host_time_sec += host_timer.last(); - copyExtendedGauge(*cpuForce, *cpuForce_ex, QUDA_CPU_FIELD_LOCATION); - copyExtendedGauge(*cudaForce, *cudaForce_ex, QUDA_CUDA_FIELD_LOCATION); - hostVerifyForce->copy(*cudaForce); + copyExtendedGauge(cpuForce, cpuForce_ex, QUDA_CPU_FIELD_LOCATION); + copyExtendedGauge(cudaForce, cudaForce_ex, QUDA_CUDA_FIELD_LOCATION); + hostVerifyForce.copy(cudaForce); int res = 1; for (int dir = 0; dir < 4; dir++) { - res &= compare_floats(cpuForce->data(dir), hostVerifyForce->data(dir), V * gauge_site_size, - getTolerance(force_prec), force_prec); + res &= compare_floats(cpuForce.data(dir), hostVerifyForce.data(dir), V * gauge_site_size, + getTolerance(force_prec), cpu_prec); } - strong_check_link(*hostVerifyForce, "GPU results: ", *cpuForce, "CPU reference results:"); + strong_check_link(hostVerifyForce, "GPU results: ", cpuForce, "CPU reference results:"); logQuda(QUDA_SUMMARIZE, "Long link force test %s\n\n", (1 == res) ? "PASSED" : "FAILED"); } } host_timer.start(); - fermion_force::hisqCompleteForce(*cudaForce_ex, *cudaGauge_ex); - updateMomentum(*cudaMom, 1.0, *cudaForce_ex, __func__); + fermion_force::hisqCompleteForce(cudaForce_ex, cudaGauge_ex); + updateMomentum(cudaMom, 1.0, cudaForce_ex, __func__); qudaDeviceSynchronize(); host_timer.stop(); complete_time_sec = host_timer.last(); if (verify_results) { host_timer.start(); - hisqCompleteForceCPU(*cpuForce_ex, *cpuGauge_ex, refMom); + hisqCompleteForceCPU(cpuForce_ex, cpuGauge_ex, &refMom); host_timer.stop(); host_time_sec += host_timer.last(); - cpuMom->copy(*cudaMom); + cpuMom.copy(cudaMom); } int accuracy_level = 3; if (verify_results) { - int res = compare_floats(cpuMom->data(), refMom->data(), 4 * cpuMom->Volume() * mom_site_size, - getTolerance(force_prec), force_prec); - accuracy_level = strong_check_mom(cpuMom->data(), refMom->data(), 4 * cpuMom->Volume(), force_prec); + int res = compare_floats(cpuMom.data(), refMom.data(), 4 * cpuMom.Volume() * mom_site_size, + getTolerance(force_prec), cpu_prec); + accuracy_level = strong_check_mom(cpuMom.data(), refMom.data(), 4 * cpuMom.Volume(), cpu_prec); logQuda(QUDA_SUMMARIZE, "Test (lepage coeff %e) %s\n", d_act_path_coeff[5], (1 == res) ? "PASSED" : "FAILED"); } long long staple_io, staple_flops, long_io, long_flops, complete_io, complete_flops; @@ -564,77 +512,54 @@ static int hisq_force_test(bool lepage) return accuracy_level; } -static void display_test_info() +TEST_P(HisqForceTest, verify) { - printfQuda("running the following fermion force computation test:\n"); - - printfQuda( - "force_precision link_reconstruct space_dim(x/y/z) T_dimension Gauge_order\n"); - printfQuda("%s %s %d/%d/%d %d %s\n", - get_prec_str(force_prec), get_recon_str(link_recon), xdim, ydim, zdim, tdim, - get_gauge_order_str(gauge_order)); + if (!quda::is_enabled(force_prec)) GTEST_SKIP(); + int level = hisq_force_test(force_prec, lepage); + int tolerance = getNegLog10Tolerance(force_prec); + ASSERT_GE(level, tolerance) << "CPU and GPU implementations do not agree"; } -TEST(paths, verify) -{ - int level = hisq_force_test(true); - // prevent tests from failing when verify is set to false - if (verify_results) { - int tolerance = getNegLog10Tolerance(force_prec); - ASSERT_GE(level, tolerance) << "CPU and GPU implementations do not agree"; - } -} +auto test_str = [](testing::TestParamInfo param) { + return std::string(get_prec_str(::testing::get<0>(param.param))) + (::testing::get<1>(param.param) ? "_lepage" : ""); +}; -TEST(paths_no_lepage, verify) -{ - int level = hisq_force_test(false); - // prevent tests from failing when verify is set to false - if (verify_results) { - int tolerance = getNegLog10Tolerance(force_prec); - ASSERT_GE(level, tolerance) << "CPU and GPU implementations do not agree"; - } -} - -int main(int argc, char **argv) -{ - // initalize google test - ::testing::InitGoogleTest(&argc, argv); +INSTANTIATE_TEST_SUITE_P(, HisqForceTest, + ::testing::Combine(::testing::Values(QUDA_DOUBLE_PRECISION, QUDA_SINGLE_PRECISION), + ::testing::Values(false, true)), + test_str); - auto app = make_app(); +static bool do_lepage = false; - try { - app->parse(argc, argv); - } catch (const CLI::ParseError &e) { - return app->exit(e); +struct hisq_path_force_test : public quda_test { + void display_info() const override + { + printfQuda( + "force_precision link_reconstruct space_dim(x/y/z) T_dimension Gauge_order\n"); + printfQuda("%s %s %d/%d/%d %d %s\n", + get_prec_str(prec), get_recon_str(link_recon), xdim, ydim, zdim, tdim, get_gauge_order_str(gauge_order)); } - initComms(argc, argv, gridsize_from_cmdline); - initRand(); - initQuda(device_ordinal); - - setPrecision(prec); - - display_test_info(); - - if (prec != QUDA_DOUBLE_PRECISION && prec != QUDA_SINGLE_PRECISION) errorQuda("Invalid precision %d", prec); - // FIXME: debugging recon 12 - if (link_recon != QUDA_RECONSTRUCT_NO && link_recon != QUDA_RECONSTRUCT_13 /* && link_recon != QUDA_RECONSTRUCT_12*/) - errorQuda("Invalid reconstruct %d", link_recon); - - // one-time setup - hisq_force_startup(); - - // Ensure gtest prints only from rank 0 - ::testing::TestEventListeners &listeners = ::testing::UnitTest::GetInstance()->listeners(); - if (comm_rank() != 0) { delete listeners.Release(listeners.default_result_printer()); } - - int test_rc = RUN_ALL_TESTS(); - if (test_rc != 0) warningQuda("Tests failed"); + void add_command_line_group(std::shared_ptr app) const override + { + quda_test::add_command_line_group(app); + app->add_option("--lepage", do_lepage, "Include the Lepage term in the force computation (default false)"); + } - hisq_force_teardown(); + hisq_path_force_test(int argc, char **argv) : quda_test("hisq_path_force_test", argc, argv) { } +}; - endQuda(); - finalizeComms(); +int main(int argc, char **argv) +{ + hisq_path_force_test test(argc, argv); + test.init(); + + int test_rc = 0; + if (!enable_testing) { + hisq_force_test(prec, do_lepage); + } else { + test_rc = test.execute(); + } return test_rc; } diff --git a/tests/hisq_unitarize_force_test.cpp b/tests/hisq_unitarize_force_test.cpp index fe4b927223..46e846f774 100644 --- a/tests/hisq_unitarize_force_test.cpp +++ b/tests/hisq_unitarize_force_test.cpp @@ -1,35 +1,34 @@ #include #include #include +#include #include +#include +#include + #include "host_utils.h" -#include -#include "gauge_field.h" +#include "command_line_params.h" #include "misc.h" #include "test.h" #include "hisq_force_reference.h" #include "ks_improved_force.h" -#include -#include - -quda::GaugeField *cudaFatLink = NULL; -quda::GaugeField *cpuFatLink = NULL; -quda::GaugeField *cudaOprod = NULL; -quda::GaugeField *cpuOprod = NULL; +using test_t = ::testing::tuple; -quda::GaugeField *cudaResult = NULL; -quda::GaugeField *cpuResult = NULL; - -quda::GaugeField *cpuReference = NULL; +class HisqUnitarizeTest : public ::testing::TestWithParam +{ +protected: + QudaPrecision precision; -static QudaGaugeParam gaugeParam; +public: + HisqUnitarizeTest() : precision(::testing::get<0>(GetParam())) { } +}; -// allocate memory -// set the layout, etc. -static void hisq_force_init() +void hisq_unitarize(QudaPrecision prec) { + QudaGaugeParam gaugeParam = newQudaGaugeParam(); + gaugeParam.X[0] = xdim; gaugeParam.X[1] = ydim; gaugeParam.X[2] = zdim; @@ -42,51 +41,34 @@ static void hisq_force_init() gaugeParam.cuda_prec = prec; gaugeParam.reconstruct = link_recon; gaugeParam.gauge_order = QUDA_QDP_GAUGE_ORDER; + gaugeParam.type = QUDA_WILSON_LINKS; quda::GaugeFieldParam gParam(gaugeParam); gParam.create = QUDA_ZERO_FIELD_CREATE; gParam.link_type = QUDA_GENERAL_LINKS; gParam.ghostExchange = QUDA_GHOST_EXCHANGE_NO; gParam.anisotropy = 1; - cpuFatLink = new quda::GaugeField(gParam); - cpuOprod = new quda::GaugeField(gParam); - cpuResult = new quda::GaugeField(gParam); - cpuReference = new quda::GaugeField(gParam); + quda::GaugeField cpuFatLink = quda::GaugeField(gParam); + + auto cpuOprod = quda::GaugeField(gParam); + auto cpuResult = quda::GaugeField(gParam); + auto cpuReference = quda::GaugeField(gParam); // create "gauge fields" - createSiteLinkCPU(*cpuFatLink, gaugeParam.cpu_prec, SiteLinkType::SITELINK_NOISY); - createSiteLinkCPU(*cpuOprod, gaugeParam.cpu_prec, SiteLinkType::SITELINK_NOISY); + createSiteLinkCPU(cpuFatLink, gaugeParam.cpu_prec, SiteLinkType::SITELINK_NOISY); + createSiteLinkCPU(cpuOprod, gaugeParam.cpu_prec, SiteLinkType::SITELINK_NOISY); gParam.location = QUDA_CUDA_FIELD_LOCATION; gParam.setPrecision(gaugeParam.cuda_prec, true); - cudaFatLink = new quda::GaugeField(gParam); - cudaOprod = new quda::GaugeField(gParam); - cudaResult = new quda::GaugeField(gParam); + auto cudaFatLink = quda::GaugeField(gParam); + auto cudaOprod = quda::GaugeField(gParam); + auto cudaResult = quda::GaugeField(gParam); gParam.order = QUDA_QDP_GAUGE_ORDER; - cudaFatLink->copy(*cpuFatLink); - cudaOprod->copy(*cpuOprod); -} - -static void hisq_force_end() -{ - delete cpuFatLink; - delete cpuOprod; - delete cpuResult; - - delete cudaFatLink; - delete cudaOprod; - delete cudaResult; - - delete cpuReference; -} - -TEST(hisq_force_unitarize, verify) -{ - setVerbosity(verbosity); - hisq_force_init(); + cudaFatLink.copy(cpuFatLink); + cudaOprod.copy(cpuOprod); double unitarize_eps = 1e-5; const double hisq_force_filter = 5e-5; @@ -103,64 +85,65 @@ TEST(hisq_force_unitarize, verify) qudaMemset(num_failures_dev, 0, sizeof(int)); printfQuda("Calling unitarizeForce\n"); - quda::fermion_force::unitarizeForce(*cudaResult, *cudaOprod, *cudaFatLink, num_failures_dev); + quda::fermion_force::unitarizeForce(cudaResult, cudaOprod, cudaFatLink, num_failures_dev); device_free(num_failures_dev); if (verify_results) { printfQuda("Calling unitarizeForceCPU\n"); - quda::fermion_force::unitarizeForceCPU(*cpuResult, *cpuOprod, *cpuFatLink); + quda::fermion_force::unitarizeForceCPU(cpuResult, cpuOprod, cpuFatLink); } - cpuReference->copy(*cudaResult); + cpuReference.copy(cudaResult); printfQuda("Comparing CPU and GPU results\n"); int res[4]; double accuracy = prec == QUDA_DOUBLE_PRECISION ? 1e-10 : 1e-5; for (int dir = 0; dir < 4; ++dir) { - res[dir] = compare_floats(cpuReference->data(dir), cpuResult->data(dir), - cpuReference->Volume() * gauge_site_size, accuracy, gaugeParam.cpu_prec); + res[dir] = compare_floats(cpuReference.data(dir), cpuResult.data(dir), + cpuReference.Volume() * gauge_site_size, accuracy, gaugeParam.cpu_prec); quda::comm_allreduce_int(res[dir]); res[dir] /= quda::comm_size(); } - hisq_force_end(); - for (int dir = 0; dir < 4; ++dir) { ASSERT_EQ(res[dir], 1) << "Dir:" << dir; } } -static void display_test_info() +TEST_P(HisqUnitarizeTest, verify) { - printfQuda("running the following fermion force computation test:\n"); - - printfQuda("link_precision link_reconstruct space_dim(x/y/z) T_dimension\n"); - printfQuda("%s %s %d/%d/%d %d \n", get_prec_str(prec), - get_recon_str(link_recon), xdim, ydim, zdim, tdim); + prec = ::testing::get<0>(GetParam()); + if (!quda::is_enabled(prec)) GTEST_SKIP(); + hisq_unitarize(prec); } -int main(int argc, char **argv) -{ - // initalize google test - ::testing::InitGoogleTest(&argc, argv); - - auto app = make_app(); - try { - app->parse(argc, argv); - } catch (const CLI::ParseError &e) { - return app->exit(e); - } - - initComms(argc, argv, gridsize_from_cmdline); - initQuda(device_ordinal); +auto test_str + = [](testing::TestParamInfo param) { return std::string(get_prec_str(::testing::get<0>(param.param))); }; - display_test_info(); +INSTANTIATE_TEST_SUITE_P(, HisqUnitarizeTest, ::testing::Values(QUDA_DOUBLE_PRECISION, QUDA_SINGLE_PRECISION), test_str); - int test_rc = RUN_ALL_TESTS(); +struct hisq_unitarize_test : public quda_test { + void display_info() const override + { + printfQuda("link_precision link_reconstruct space_dim(x/y/z) T_dimension\n"); + printfQuda("%s %s %d/%d/%d %d \n", + get_prec_str(prec), get_recon_str(link_recon), xdim, ydim, zdim, tdim); + } - endQuda(); - finalizeComms(); + hisq_unitarize_test(int argc, char **argv) : quda_test("hisq_unitarize_test", argc, argv) { } +}; +int main(int argc, char **argv) +{ + hisq_unitarize_test test(argc, argv); + test.init(); + int test_rc = 0; + + if (!enable_testing) { + hisq_unitarize(prec); + } else { + test_rc = test.execute(); + } return test_rc; } diff --git a/tests/host_reference/gauge_force_reference.cpp b/tests/host_reference/gauge_force_reference.cpp index c7b00d73fa..14bb13ee3f 100644 --- a/tests/host_reference/gauge_force_reference.cpp +++ b/tests/host_reference/gauge_force_reference.cpp @@ -101,10 +101,10 @@ template struct ComputePathProduct { const void *const loop_coeff_, int coeff_index, int dir, const lattice_t &lat) { using matrix = Matrix<3, std::complex>; - auto sitelink = reinterpret_cast(sitelink_); + auto sitelink = reinterpret_cast(sitelink_); - auto staple = reinterpret_cast(staple_); - auto loop_coeff = reinterpret_cast(loop_coeff_); + auto staple = reinterpret_cast(staple_); + auto loop_coeff = reinterpret_cast(loop_coeff_); auto coeff = loop_coeff[coeff_index]; #pragma omp parallel for @@ -144,7 +144,7 @@ template struct ComputeLoopTrace { const lattice_t &lat) { using matrix = Matrix<3, std::complex>; - auto sitelink = reinterpret_cast(sitelink_); + auto sitelink = reinterpret_cast(sitelink_); std::complex accum = 0; @@ -185,9 +185,9 @@ template struct UpdateMomentum { { using matrix = Matrix<3, std::complex>; - auto momentum = reinterpret_cast *const>(momentum_); - auto sitelink = reinterpret_cast(sitelink_); - auto staple = reinterpret_cast(staple_); + auto momentum = reinterpret_cast *>(momentum_); + auto sitelink = reinterpret_cast(sitelink_); + auto staple = reinterpret_cast(staple_); #pragma omp parallel for for (size_t i = 0; i < lat.volume; i++) { @@ -228,9 +228,9 @@ template struct UpdateGauge { { using matrix = Matrix<3, std::complex>; - auto gauge = reinterpret_cast(gauge_); - auto sitelink = reinterpret_cast(sitelink_); - auto staple = reinterpret_cast(staple_); + auto gauge = reinterpret_cast(gauge_); + auto sitelink = reinterpret_cast(sitelink_); + auto staple = reinterpret_cast(staple_); #pragma omp parallel for for (size_t i = 0; i < lat.volume; i++) { diff --git a/tests/host_reference/hisq_force_reference.cpp b/tests/host_reference/hisq_force_reference.cpp index 6b761d6925..489612d030 100644 --- a/tests/host_reference/hisq_force_reference.cpp +++ b/tests/host_reference/hisq_force_reference.cpp @@ -18,7 +18,7 @@ extern int Vh; template struct ComputeLinkOrderedOuterProduct { void operator()(const void *const src_, quda::GaugeField &dest, size_t nhops) { - auto src = reinterpret_cast *const>(src_); + auto src = reinterpret_cast *>(src_); #pragma omp parallel for for (int i = 0; i < V; ++i) { @@ -35,9 +35,9 @@ template struct ComputeLinkOrderedOuterProduct { } }; -void computeLinkOrderedOuterProduct(void *src, quda::GaugeField &dst, QudaPrecision precision, size_t nhops) +void computeLinkOrderedOuterProduct(void *src, quda::GaugeField &dst, size_t nhops) { - instantiate_host(precision, src, dst, nhops); + instantiate_host(dst.Precision(), src, dst, nhops); } #define RETURN_IF_ERR \ @@ -814,9 +814,9 @@ template struct HisqStaplesForce { void hisqStaplesForceCPU(const double *path_coeff, quda::GaugeField &oprod, quda::GaugeField &link, quda::GaugeField *newOprod) { + auto precision = quda::checkPrecision(oprod, link, *newOprod); int X_[4]; for (int d = 0; d < 4; d++) X_[d] = oprod.X()[d] - 2 * oprod.R()[d]; - QudaPrecision precision = oprod.Precision(); uint64_t len = is_multi_gpu() ? (2 * Vh_ex) : (X_[0] * X_[1] * X_[2] * X_[3]); @@ -890,9 +890,9 @@ template struct ComputeLongLinkField { void operator()(const int dim[4], const void *const *const oprod_, const void *const *const link_, int sig, real_t coeff, void *const *const output_) { - auto oprod = reinterpret_cast(oprod_); - auto link = reinterpret_cast(link_); - auto output = reinterpret_cast(output_); + auto oprod = reinterpret_cast(oprod_); + auto link = reinterpret_cast(link_); + auto output = reinterpret_cast(output_); int volume = 1; for (int dir = 0; dir < 4; ++dir) volume *= dim[dir]; @@ -913,9 +913,9 @@ template struct ComputeLongLinkField { void hisqLongLinkForceCPU(double coeff, quda::GaugeField &oprod, quda::GaugeField &link, quda::GaugeField *newOprod) { + auto precision = quda::checkPrecision(oprod, link, *newOprod); int X_[4]; for (int d = 0; d < 4; d++) X_[d] = oprod.X()[d] - 2 * oprod.R()[d]; - QudaPrecision precision = oprod.Precision(); for (int sig = 0; sig < 4; ++sig) { instantiate_host(precision, X_, oprod.data_array().data, link.data_array().data, sig, coeff, @@ -947,9 +947,9 @@ template struct CompleteForceField { void operator()(const int dim[4], const void *const *const oprod_, const void *const *const link_, int sig, void *const mom_) { - auto oprod = reinterpret_cast(oprod_); - auto link = reinterpret_cast(link_); - auto mom = reinterpret_cast(mom_); + auto oprod = reinterpret_cast(oprod_); + auto link = reinterpret_cast(link_); + auto mom = reinterpret_cast(mom_); int volume = dim[0] * dim[1] * dim[2] * dim[3]; const int half_volume = volume / 2; @@ -964,12 +964,12 @@ template struct CompleteForceField { void hisqCompleteForceCPU(quda::GaugeField &oprod, quda::GaugeField &link, quda::GaugeField *mom) { + auto precision = quda::checkPrecision(oprod, link, *mom); int X_[4]; for (int d = 0; d < 4; d++) X_[d] = oprod.X()[d] - 2 * oprod.R()[d]; - QudaPrecision precision = oprod.Precision(); for (int sig = 0; sig < 4; ++sig) { instantiate_host(precision, X_, oprod.data_array().data, link.data_array().data, sig, mom->data()); } // loop over sig -} \ No newline at end of file +} diff --git a/tests/host_reference/hisq_force_reference.h b/tests/host_reference/hisq_force_reference.h index bdf78c4750..06052df319 100644 --- a/tests/host_reference/hisq_force_reference.h +++ b/tests/host_reference/hisq_force_reference.h @@ -12,7 +12,7 @@ @param[in] precision Precision of data (single or double) @param[in] separation Offset for outer product (1 for fat links, 3 for long links) */ -void computeLinkOrderedOuterProduct(void *src, quda::GaugeField &dest, QudaPrecision precision, size_t separation); +void computeLinkOrderedOuterProduct(void *src, quda::GaugeField &dest, size_t separation); /** @brief Compute the force contribution from the fat links, CPU version diff --git a/tests/staggered_gsmear_test.cpp b/tests/staggered_gsmear_test.cpp index 84f793dcb9..cdcab64d9c 100644 --- a/tests/staggered_gsmear_test.cpp +++ b/tests/staggered_gsmear_test.cpp @@ -1,4 +1,3 @@ -#include "test.h" #include "staggered_gsmear_test_utils.h" using namespace quda; @@ -6,43 +5,52 @@ using namespace quda; int argc_copy; char **argv_copy; -class StaggeredGSmearTest : public ::testing::Test +using test_t = ::testing::tuple; + +class StaggeredGSmearTest : public ::testing::TestWithParam { protected: StaggeredGSmearTestWrapper gsmear_test_wrapper; - void display_test_info() - { - printfQuda("running the following test:\n"); - printfQuda("prec recon test_type S_dim T_dimension\n"); - printfQuda("%s %s %s %d/%d/%d %d \n", get_prec_str(prec), get_recon_str(link_recon), - get_string(gtest_type_map, gtest_type).c_str(), xdim, ydim, zdim, tdim); - printfQuda("Grid partition info: X Y Z T\n"); - printfQuda(" %d %d %d %d\n", dimPartitioned(0), dimPartitioned(1), dimPartitioned(2), - dimPartitioned(3)); - } - public: StaggeredGSmearTest() = default; virtual void SetUp() { + prec = ::testing::get<0>(GetParam()); + gtest_type = ::testing::get<1>(GetParam()); + if (!quda::is_enabled(prec)) GTEST_SKIP(); gsmear_test_wrapper.init_test(argc_copy, argv_copy); - display_test_info(); } - virtual void TearDown() { gsmear_test_wrapper.end(); } + virtual void TearDown() + { + if (!quda::is_enabled(prec)) GTEST_SKIP(); + gsmear_test_wrapper.end(); + } - static void SetUpTestCase() { initQuda(device_ordinal); } + static void SetUpTestCase() { } - static void TearDownTestCase() { endQuda(); } + static void TearDownTestCase() { } }; -TEST_F(StaggeredGSmearTest, benchmark) { gsmear_test_wrapper.run_test(niter, /**show_metrics =*/true); } - -TEST_F(StaggeredGSmearTest, verify) +TEST_P(StaggeredGSmearTest, verify) { - if (!verify_results) GTEST_SKIP(); + prec = ::testing::get<0>(GetParam()); + gtest_type = ::testing::get<1>(GetParam()); + if (!quda::is_enabled(prec)) GTEST_SKIP(); + + switch (gtest_type) { + case gsmear_test_type::TwoLink: + laplace3D = 4; + smear_t0 = -1; + break; + case gsmear_test_type::GaussianSmear: + laplace3D = 3; + smear_t0 = 1; + break; + default: errorQuda("Unexpected gsmear_type = %s", get_string(gtest_type_map, gtest_type).c_str()); + } gsmear_test_wrapper.staggeredGSmearRef(); gsmear_test_wrapper.run_test(2); @@ -52,41 +60,55 @@ TEST_F(StaggeredGSmearTest, verify) ASSERT_LE(deviation, tol) << "reference and QUDA implementations do not agree"; } +struct gsmear_test : public quda_test { + void display_info() const override + { + printfQuda("prec recon test_type S_dim T_dimension\n"); + printfQuda("%s %s %s %d/%d/%d %d \n", get_prec_str(prec), get_recon_str(link_recon), + get_string(gtest_type_map, gtest_type).c_str(), xdim, ydim, zdim, tdim); + } -int main(int argc, char **argv) -{ - // initalize google test - ::testing::InitGoogleTest(&argc, argv); - - // command line options - auto app = make_app(); - app->add_option("--test", gtest_type, "Test method")->transform(CLI::CheckedTransformer(gtest_type_map)); - add_quark_smear_option_group(app); - add_su3_option_group(app); - add_comms_option_group(app); - try { - app->parse(argc, argv); - } catch (const CLI::ParseError &e) { - return app->exit(e); + void add_command_line_group(std::shared_ptr app) const override + { + quda_test::add_command_line_group(app); + app->add_option("--test", gtest_type, "Test method")->transform(CLI::CheckedTransformer(gtest_type_map)); + add_quark_smear_option_group(app); + add_su3_option_group(app); } - initComms(argc, argv, gridsize_from_cmdline); + gsmear_test(int argc, char **argv) : quda_test("gsmear_test", argc, argv) { } +}; + +auto test_str = [](testing::TestParamInfo param) { + return std::string(get_prec_str(::testing::get<0>(param.param))) + "_" + + get_string(gtest_type_map, ::testing::get<1>(param.param)); +}; + +using ::testing::Combine; +using ::testing::Values; + +INSTANTIATE_TEST_SUITE_P(, StaggeredGSmearTest, + Combine(Values(QUDA_DOUBLE_PRECISION, QUDA_SINGLE_PRECISION), + Values(gsmear_test_type::TwoLink, gsmear_test_type::GaussianSmear)), + test_str); + +int main(int argc, char **argv) +{ + gsmear_test test(argc, argv); + test.init(); // Same approach as in Staggered DslashTest argc_copy = argc; argv_copy = argv; - // Ensure gtest prints only from rank 0 - ::testing::TestEventListeners &listeners = ::testing::UnitTest::GetInstance()->listeners(); - if (comm_rank() != 0) { delete listeners.Release(listeners.default_result_printer()); } + if (link_recon != QUDA_RECONSTRUCT_NO) errorQuda("Error: link reconstruction is currently not supported"); - if (link_recon != QUDA_RECONSTRUCT_NO) { - printfQuda("Error: link reconstruction is currently not supported.\n"); - exit(0); - } + int test_rc = 0; + if (!enable_testing) { - int test_rc = RUN_ALL_TESTS(); + } else { + test_rc = test.execute(); + } - finalizeComms(); return test_rc; } diff --git a/tests/staggered_gsmear_test_utils.h b/tests/staggered_gsmear_test_utils.h index 2cc7f1c2a8..8e4e2a9014 100644 --- a/tests/staggered_gsmear_test_utils.h +++ b/tests/staggered_gsmear_test_utils.h @@ -11,13 +11,13 @@ #include #include #include +#include -#include - -#include -#include +#include "misc.h" +#include "host_utils.h" #include "gauge_utils.h" #include "command_line_params.h" +#include "test.h" using namespace quda; @@ -152,7 +152,7 @@ struct StaggeredGSmearTestWrapper { // void init_ctest_once() { static bool has_been_called = false; - if (has_been_called) { errorQuda("This function is not supposed to be called twice.\n"); } + if (has_been_called) { errorQuda("This function is not supposed to be called twice"); } is_ctest = true; // Is being used in dslash_ctest. has_been_called = true; } @@ -160,7 +160,7 @@ struct StaggeredGSmearTestWrapper { // void end_ctest_once() { static bool has_been_called = false; - if (has_been_called) { errorQuda("This function is not supposed to be called twice.\n"); } + if (has_been_called) { errorQuda("This function is not supposed to be called twice"); } has_been_called = true; } @@ -209,7 +209,6 @@ struct StaggeredGSmearTestWrapper { // void init(int argc, char **argv) { - setVerbosity(verbosity); inv_param.split_grid[0] = grid_partition[0]; inv_param.split_grid[1] = grid_partition[1]; inv_param.split_grid[2] = grid_partition[2]; diff --git a/tests/staggered_invert_test.cpp b/tests/staggered_invert_test.cpp index 1f5c7c726e..f5d2f31505 100644 --- a/tests/staggered_invert_test.cpp +++ b/tests/staggered_invert_test.cpp @@ -31,6 +31,14 @@ bool use_multi_src = false; // print instructions on how to run the old tests bool print_legacy_info = false; +QudaPrecision last_prec = QUDA_INVALID_PRECISION; + +GaugeField cpuInQDP = {}; +GaugeField cpuFatQDP = {}; +GaugeField cpuLongQDP = {}; +GaugeField cpuFatMILC = {}; +GaugeField cpuLongMILC = {}; + // if --enable-testing true is passed, we run the tests defined in here #include @@ -131,11 +139,6 @@ void display_legacy_info() "--test 6 -> --solve-type direct-pc --solution-type mat-pc --inv-type cg --matpc odd-odd --multishift 8\n"); } -GaugeField cpuFatQDP = {}; -GaugeField cpuLongQDP = {}; -GaugeField cpuFatMILC = {}; -GaugeField cpuLongMILC = {}; - void init() { // Set QUDA internal parameters @@ -202,7 +205,7 @@ void init() cpuParam.order = QUDA_QDP_GAUGE_ORDER; cpuParam.ghostExchange = QUDA_GHOST_EXCHANGE_PAD; cpuParam.create = QUDA_NULL_FIELD_CREATE; - GaugeField cpuIn = GaugeField(cpuParam); + cpuInQDP = GaugeField(cpuParam); cpuFatQDP = GaugeField(cpuParam); cpuParam.order = QUDA_MILC_GAUGE_ORDER; cpuFatMILC = GaugeField(cpuParam); @@ -214,7 +217,7 @@ void init() cpuParam.order = QUDA_MILC_GAUGE_ORDER; cpuLongMILC = GaugeField(cpuParam); - void *qdp_inlink[4] = {cpuIn.data(0), cpuIn.data(1), cpuIn.data(2), cpuIn.data(3)}; + void *qdp_inlink[4] = {cpuInQDP.data(0), cpuInQDP.data(1), cpuInQDP.data(2), cpuInQDP.data(3)}; void *qdp_fatlink[4] = {cpuFatQDP.data(0), cpuFatQDP.data(1), cpuFatQDP.data(2), cpuFatQDP.data(3)}; void *qdp_longlink[4] = {cpuLongQDP.data(0), cpuLongQDP.data(1), cpuLongQDP.data(2), cpuLongQDP.data(3)}; constructStaggeredHostGaugeField(qdp_inlink, qdp_longlink, qdp_fatlink, gauge_param, 0, nullptr, true); @@ -254,20 +257,22 @@ void init() std::vector> solve(test_t param) { - inv_param.inv_type = ::testing::get<0>(param); - inv_param.solution_type = ::testing::get<1>(param); - inv_param.solve_type = ::testing::get<2>(param); - inv_param.cuda_prec_sloppy = ::testing::get<3>(param); - multishift = ::testing::get<4>(param); - inv_param.solution_accumulator_pipeline = ::testing::get<5>(param); + inv_param.cuda_prec = ::testing::get<0>(param); + inv_param.cuda_prec_sloppy = ::testing::get<1>(param); + inv_param.cuda_prec_refinement_sloppy = ::testing::get<1>(param); + inv_param.inv_type = ::testing::get<2>(param); + inv_param.solution_type = ::testing::get<3>(param); + inv_param.solve_type = ::testing::get<4>(param); + multishift = ::testing::get<5>(param); + inv_param.solution_accumulator_pipeline = ::testing::get<6>(param); // schwarz parameters - auto schwarz_param = ::testing::get<6>(param); + auto schwarz_param = ::testing::get<7>(param); inv_param.schwarz_type = ::testing::get<0>(schwarz_param); inv_param.inv_type_precondition = ::testing::get<1>(schwarz_param); inv_param.cuda_prec_precondition = ::testing::get<2>(schwarz_param); - inv_param.residual_type = ::testing::get<7>(param); + inv_param.residual_type = ::testing::get<8>(param); // reset lambda_max if we're doing a testing loop to ensure correct lambma_max if (enable_testing) inv_param.ca_lambda_max = -1.0; @@ -463,6 +468,7 @@ std::vector> solve(test_t param) void cleanup() { + cpuInQDP = {}; cpuFatQDP = {}; cpuLongQDP = {}; cpuFatMILC = {}; @@ -569,7 +575,7 @@ int main(int argc, char **argv) result = RUN_ALL_TESTS(); } else { for (int rep = 0; rep < nrepeat; rep++) - solve(test_t {inv_type, solution_type, solve_type, prec_sloppy, multishift, solution_accumulator_pipeline, + solve(test_t {prec, prec_sloppy, inv_type, solution_type, solve_type, multishift, solution_accumulator_pipeline, schwarz_t {precon_schwarz_type, inv_multigrid ? QUDA_MG_INVERTER : precon_type, prec_precondition}, inv_param.residual_type}); } diff --git a/tests/staggered_invert_test_gtest.hpp b/tests/staggered_invert_test_gtest.hpp index af150fea86..19371b4944 100644 --- a/tests/staggered_invert_test_gtest.hpp +++ b/tests/staggered_invert_test_gtest.hpp @@ -4,30 +4,23 @@ // tuple containing parameters for Schwarz solver using schwarz_t = ::testing::tuple; -using test_t - = ::testing::tuple; - -class StaggeredInvertTest : public ::testing::TestWithParam -{ -protected: - test_t param; - -public: - StaggeredInvertTest() : param(GetParam()) { } -}; +using test_t = ::testing::tuple; bool skip_test(test_t param) { - auto inverter_type = ::testing::get<0>(param); - auto solution_type = ::testing::get<1>(param); - auto solve_type = ::testing::get<2>(param); - auto prec_sloppy = ::testing::get<3>(param); - auto multishift = ::testing::get<4>(param); - auto solution_accumulator_pipeline = ::testing::get<5>(param); - auto schwarz_param = ::testing::get<6>(param); + auto prec = ::testing::get<0>(param); + auto prec_sloppy = ::testing::get<1>(param); + auto inverter_type = ::testing::get<2>(param); + auto solution_type = ::testing::get<3>(param); + auto solve_type = ::testing::get<4>(param); + auto multishift = ::testing::get<5>(param); + auto solution_accumulator_pipeline = ::testing::get<6>(param); + auto schwarz_param = ::testing::get<7>(param); auto prec_precondition = ::testing::get<2>(schwarz_param); if (prec < prec_sloppy) return true; // outer precision >= sloppy precision + if (!(QUDA_PRECISION & prec)) return true; // precision not enabled so skip it if (!(QUDA_PRECISION & prec_sloppy)) return true; // precision not enabled so skip it if (!(QUDA_PRECISION & prec_precondition) && prec_precondition != QUDA_INVALID_PRECISION) return true; // precision not enabled so skip it @@ -70,21 +63,73 @@ bool skip_test(test_t param) return false; } +class StaggeredInvertTest : public ::testing::TestWithParam +{ +protected: + test_t param; + +public: + StaggeredInvertTest() : param(GetParam()) { } + + virtual void SetUp() + { + if (skip_test(GetParam())) GTEST_SKIP(); + + // check if outer precision has changed and update if it has + if (::testing::get<0>(param) != last_prec) { + if (last_prec != QUDA_INVALID_PRECISION) freeGaugeQuda(); + + // Compute the plaquette. Routine is aware that the gauge fields already have the phases on them. + // This needs to be called before `loadFatLongGaugeQuda` because this routine also loads the + // gauge fields with different parameters. + void *qdp_inlink[4] = {cpuInQDP.data(0), cpuInQDP.data(1), cpuInQDP.data(2), cpuInQDP.data(3)}; + + // Load the gauge field to the device + gauge_param.cuda_prec = ::testing::get<0>(param); + gauge_param.cuda_prec_sloppy = ::testing::get<0>(param); + gauge_param.cuda_prec_precondition = ::testing::get<0>(param); + gauge_param.cuda_prec_refinement_sloppy = ::testing::get<0>(param); + gauge_param.cuda_prec_eigensolver = ::testing::get<0>(param); + + double plaq[3]; + computeStaggeredPlaquetteQDPOrder(qdp_inlink, plaq, gauge_param, dslash_type); + printfQuda("Computed plaquette is %e (spatial = %e, temporal = %e)\n", plaq[0], plaq[1], plaq[2]); + + if (dslash_type == QUDA_ASQTAD_DSLASH) { + // Compute fat link plaquette + void *qdp_fatlink[4] = {cpuFatQDP.data(0), cpuFatQDP.data(1), cpuFatQDP.data(2), cpuFatQDP.data(3)}; + computeStaggeredPlaquetteQDPOrder(qdp_fatlink, plaq, gauge_param, dslash_type); + printfQuda("Computed fat link plaquette is %e (spatial = %e, temporal = %e)\n", plaq[0], plaq[1], plaq[2]); + } + + freeGaugeQuda(); + + loadFatLongGaugeQuda(cpuFatMILC.data(), cpuLongMILC.data(), gauge_param); + + last_prec = ::testing::get<0>(param); + } + } +}; + std::vector> solve(test_t param); TEST_P(StaggeredInvertTest, verify) { if (skip_test(GetParam())) GTEST_SKIP(); + // Loosen tolerances to keep iterations to solution in check + tol = ::testing::get<0>(param) == QUDA_DOUBLE_PRECISION ? 1e-6 : 1e-5; + tol_hq = tol; + inv_param.tol = 0.0; inv_param.tol_hq = 0.0; - auto res_t = ::testing::get<7>(GetParam()); + auto res_t = ::testing::get<8>(GetParam()); if (res_t & QUDA_L2_RELATIVE_RESIDUAL) inv_param.tol = tol; if (res_t & QUDA_HEAVY_QUARK_RESIDUAL) inv_param.tol_hq = tol_hq; - auto inverter_type = ::testing::get<0>(param); - auto solution_type = ::testing::get<1>(param); - auto solve_type = ::testing::get<2>(param); + auto inverter_type = ::testing::get<2>(param); + auto solution_type = ::testing::get<3>(param); + auto solve_type = ::testing::get<4>(param); // Make a local copy of "tol" for modification in place auto verify_tol = tol; @@ -126,21 +171,22 @@ TEST_P(StaggeredInvertTest, verify) std::string gettestname(::testing::TestParamInfo param) { std::string name; - name += get_solver_str(::testing::get<0>(param.param)) + std::string("_"); - name += get_solution_str(::testing::get<1>(param.param)) + std::string("_"); - name += get_solve_str(::testing::get<2>(param.param)) + std::string("_"); - name += get_prec_str(::testing::get<3>(param.param)); - if (::testing::get<4>(param.param) > 1) - name += std::string("_shift") + std::to_string(::testing::get<4>(param.param)); + name += get_prec_str(::testing::get<0>(param.param)) + std::string("_"); + name += get_prec_str(::testing::get<1>(param.param)) + std::string("_"); + name += get_solver_str(::testing::get<2>(param.param)) + std::string("_"); + name += get_solution_str(::testing::get<3>(param.param)) + std::string("_"); + name += get_solve_str(::testing::get<4>(param.param)); if (::testing::get<5>(param.param) > 1) - name += std::string("_solution_accumulator_pipeline") + std::to_string(::testing::get<5>(param.param)); - auto &schwarz_param = ::testing::get<6>(param.param); + name += std::string("_shift") + std::to_string(::testing::get<5>(param.param)); + if (::testing::get<6>(param.param) > 1) + name += std::string("_solution_accumulator_pipeline") + std::to_string(::testing::get<6>(param.param)); + auto &schwarz_param = ::testing::get<7>(param.param); if (::testing::get<0>(schwarz_param) != QUDA_INVALID_SCHWARZ) { name += std::string("_") + get_schwarz_str(::testing::get<0>(schwarz_param)); name += std::string("_") + get_solver_str(::testing::get<1>(schwarz_param)); name += std::string("_") + get_prec_str(::testing::get<2>(schwarz_param)); } - auto res_t = ::testing::get<7>(param.param); + auto res_t = ::testing::get<8>(param.param); if (res_t & QUDA_L2_RELATIVE_RESIDUAL) name += std::string("_l2"); if (res_t & QUDA_HEAVY_QUARK_RESIDUAL) name += std::string("_heavy_quark"); return name; @@ -159,6 +205,8 @@ auto direct_solvers = Values(QUDA_CGNE_INVERTER, QUDA_CGNR_INVERTER, QUDA_CA_CGN QUDA_CG3NE_INVERTER, QUDA_CG3NR_INVERTER, QUDA_GCR_INVERTER, QUDA_CA_GCR_INVERTER, QUDA_BICGSTAB_INVERTER, QUDA_BICGSTABL_INVERTER, QUDA_MR_INVERTER); +auto precisions = Values(QUDA_DOUBLE_PRECISION, QUDA_SINGLE_PRECISION); + auto sloppy_precisions = Values(QUDA_DOUBLE_PRECISION, QUDA_SINGLE_PRECISION, QUDA_HALF_PRECISION, QUDA_QUARTER_PRECISION); @@ -173,35 +221,36 @@ auto no_heavy_quark = Values(QUDA_L2_RELATIVE_RESIDUAL); // preconditioned solves INSTANTIATE_TEST_SUITE_P(EvenOdd, StaggeredInvertTest, - Combine(staggered_pc_solvers, Values(QUDA_MATPC_SOLUTION, QUDA_MAT_SOLUTION), - Values(QUDA_DIRECT_PC_SOLVE), sloppy_precisions, Values(1), - solution_accumulator_pipelines, no_schwarz, no_heavy_quark), + Combine(precisions, sloppy_precisions, staggered_pc_solvers, + Values(QUDA_MATPC_SOLUTION, QUDA_MAT_SOLUTION), Values(QUDA_DIRECT_PC_SOLVE), + Values(1), solution_accumulator_pipelines, no_schwarz, no_heavy_quark), gettestname); // full system normal solve INSTANTIATE_TEST_SUITE_P(NormalFull, StaggeredInvertTest, - Combine(normal_solvers, Values(QUDA_MATDAG_MAT_SOLUTION, QUDA_MAT_SOLUTION), - Values(QUDA_NORMOP_SOLVE), sloppy_precisions, Values(1), - solution_accumulator_pipelines, no_schwarz, no_heavy_quark), + Combine(precisions, sloppy_precisions, normal_solvers, + Values(QUDA_MATDAG_MAT_SOLUTION, QUDA_MAT_SOLUTION), Values(QUDA_NORMOP_SOLVE), + Values(1), solution_accumulator_pipelines, no_schwarz, no_heavy_quark), gettestname); // full system direct solve INSTANTIATE_TEST_SUITE_P(Full, StaggeredInvertTest, - Combine(direct_solvers, Values(QUDA_MAT_SOLUTION), Values(QUDA_DIRECT_SOLVE), sloppy_precisions, - Values(1), solution_accumulator_pipelines, no_schwarz, no_heavy_quark), + Combine(precisions, sloppy_precisions, direct_solvers, Values(QUDA_MAT_SOLUTION), + Values(QUDA_DIRECT_SOLVE), Values(1), solution_accumulator_pipelines, no_schwarz, + no_heavy_quark), gettestname); // preconditioned multi-shift solves INSTANTIATE_TEST_SUITE_P(MultiShiftEvenOdd, StaggeredInvertTest, - Combine(Values(QUDA_CG_INVERTER), Values(QUDA_MATPC_SOLUTION), Values(QUDA_DIRECT_PC_SOLVE), - sloppy_precisions, Values(10), solution_accumulator_pipelines, no_schwarz, + Combine(precisions, sloppy_precisions, Values(QUDA_CG_INVERTER), Values(QUDA_MATPC_SOLUTION), + Values(QUDA_DIRECT_PC_SOLVE), Values(10), solution_accumulator_pipelines, no_schwarz, no_heavy_quark), gettestname); // Heavy-Quark preconditioned solves INSTANTIATE_TEST_SUITE_P(HeavyQuarkEvenOdd, StaggeredInvertTest, - Combine(Values(QUDA_CG_INVERTER), Values(QUDA_MATPC_SOLUTION), Values(QUDA_DIRECT_PC_SOLVE), - sloppy_precisions, Values(1), solution_accumulator_pipelines, no_schwarz, + Combine(precisions, sloppy_precisions, Values(QUDA_CG_INVERTER), Values(QUDA_MATPC_SOLUTION), + Values(QUDA_DIRECT_PC_SOLVE), Values(1), solution_accumulator_pipelines, no_schwarz, Values(QUDA_L2_RELATIVE_RESIDUAL | QUDA_HEAVY_QUARK_RESIDUAL, QUDA_HEAVY_QUARK_RESIDUAL)), gettestname); diff --git a/tests/unitarize_link_test.cpp b/tests/unitarize_link_test.cpp index 6a175204f8..1341405122 100644 --- a/tests/unitarize_link_test.cpp +++ b/tests/unitarize_link_test.cpp @@ -3,24 +3,19 @@ #include #include -#include "quda.h" -#include "timer.h" -#include "gauge_field.h" +#include +#include +#include +#include +#include "util_quda.h" +#include + #include "host_utils.h" -#include +#include "command_line_params.h" #include "misc.h" #include "test.h" -#include "util_quda.h" -#include "llfat_quda.h" -#include #include "ks_improved_force.h" - -#ifdef MULTI_GPU #include "comm_quda.h" -#endif - -// google test frame work -#include #define TDIFF(a, b) (b.tv_sec - a.tv_sec + 0.000001 * (b.tv_usec - a.tv_usec)) @@ -33,34 +28,23 @@ static double max_allowed_error = 1e-11; static QudaGaugeFieldOrder gauge_order = QUDA_MILC_GAUGE_ORDER; -quda::GaugeField *cpuFatLink, *cpuULink, *cudaResult; -quda::GaugeField *cudaFatLink, *cudaULink; - const double unittol = (prec == QUDA_DOUBLE_PRECISION) ? 1e-10 : 1e-6; -TEST(unitarization, verify) -{ - unitarizeLinksCPU(*cpuULink, *cpuFatLink); - cudaResult->copy(*cudaULink); - - int res = compare_floats(cudaResult->data(), cpuULink->data(), 4 * cudaResult->Volume() * gauge_site_size, unittol, - cpu_prec); +using test_t = ::testing::tuple; -#ifdef MULTI_GPU - quda::comm_allreduce_int(res); - res /= quda::comm_size(); -#endif +class UnitarizeTest : public ::testing::TestWithParam +{ +protected: + QudaPrecision precision; - ASSERT_EQ(res, 1) << "CPU and CUDA implementations do not agree"; -} +public: + UnitarizeTest() : precision(::testing::get<0>(GetParam())) { } +}; -static int unitarize_link_test(int &test_rc) +void unitarize(QudaPrecision prec) { - setVerbosity(verbosity); QudaGaugeParam qudaGaugeParam = newQudaGaugeParam(); - qudaGaugeParam.anisotropy = 1.0; - qudaGaugeParam.X[0] = xdim; qudaGaugeParam.X[1] = ydim; qudaGaugeParam.X[2] = zdim; @@ -70,6 +54,8 @@ static int unitarize_link_test(int &test_rc) qudaGaugeParam.type = QUDA_WILSON_LINKS; + qudaGaugeParam.anisotropy = 1.0; + qudaGaugeParam.t_boundary = QUDA_PERIODIC_T; qudaGaugeParam.anisotropy = 1.0; qudaGaugeParam.gauge_fix = QUDA_GAUGE_FIXED_NO; @@ -125,21 +111,21 @@ static int unitarize_link_test(int &test_rc) gParam.create = QUDA_REFERENCE_FIELD_CREATE; gParam.gauge = fatlink; gParam.location = QUDA_CPU_FIELD_LOCATION; - cpuFatLink = new quda::GaugeField(gParam); + auto cpuFatLink = quda::GaugeField(gParam); gParam.create = QUDA_ZERO_FIELD_CREATE; - cpuULink = new quda::GaugeField(gParam); + auto cpuULink = quda::GaugeField(gParam); gParam.create = QUDA_ZERO_FIELD_CREATE; - cudaResult = new quda::GaugeField(gParam); + auto cudaResult = quda::GaugeField(gParam); gParam.pad = 0; gParam.create = QUDA_NULL_FIELD_CREATE; gParam.reconstruct = QUDA_RECONSTRUCT_NO; gParam.setPrecision(prec, true); gParam.location = QUDA_CUDA_FIELD_LOCATION; - cudaFatLink = new quda::GaugeField(gParam); - cudaULink = new quda::GaugeField(gParam); + auto cudaFatLink = quda::GaugeField(gParam); + auto cudaULink = quda::GaugeField(gParam); { // create fat links double act_path_coeff[6]; @@ -152,7 +138,7 @@ static int unitarize_link_test(int &test_rc) computeKSLinkQuda(fatlink, NULL, NULL, inlink, act_path_coeff, &qudaGaugeParam); - cudaFatLink->copy(*cpuFatLink); + cudaFatLink.copy(cpuFatLink); } quda::setUnitarizeLinksConstants(unitarize_eps, max_allowed_error, reunit_allow_svd, reunit_svd_only, svd_rel_error, @@ -165,19 +151,22 @@ static int unitarize_link_test(int &test_rc) struct timeval t0, t1; gettimeofday(&t0, NULL); - unitarizeLinks(*cudaULink, *cudaFatLink, num_failures_d); + unitarizeLinks(cudaULink, cudaFatLink, num_failures_d); gettimeofday(&t1, NULL); if (verify_results) { - test_rc = RUN_ALL_TESTS(); - if (test_rc != 0) warningQuda("Tests failed"); + unitarizeLinksCPU(cpuULink, cpuFatLink); + cudaResult.copy(cudaULink); + + int test_rc + = compare_floats(cudaResult.data(), cpuULink.data(), 4 * cudaResult.Volume() * gauge_site_size, unittol, cpu_prec); + + quda::comm_allreduce_int(test_rc); + test_rc /= quda::comm_size(); + + ASSERT_EQ(test_rc, 1) << "CPU and CUDA implementations do not agree"; } - delete cudaResult; - delete cpuULink; - delete cpuFatLink; - delete cudaFatLink; - delete cudaULink; for (int dir = 0; dir < 4; ++dir) host_free(sitelink[dir]); host_free(fatlink); @@ -191,69 +180,61 @@ static int unitarize_link_test(int &test_rc) #endif printfQuda("Unitarization time: %g ms\n", TDIFF(t0, t1) * 1000); - return num_failures; + + quda::comm_allreduce_int(num_failures); + printfQuda("Number of failures = %d\n", num_failures); + ASSERT_EQ(num_failures, 0); + if (num_failures > 0) { + printfQuda("Failure rate = %lf\n", num_failures / (4.0 * V * quda::comm_size())); + printfQuda("You may want to increase the error tolerance or vary the unitarization parameters\n"); + } else { + printfQuda("Unitarization successfull!\n"); + } } -static void display_test_info() +TEST_P(UnitarizeTest, verify) { - printfQuda("running the following test:\n"); + prec = ::testing::get<0>(GetParam()); + if (!quda::is_enabled(prec)) GTEST_SKIP(); + unitarize(prec); +} - printfQuda("link_precision link_reconstruct space_dimension T_dimension algorithm " - "max allowed error deviation tolerance\n"); - printfQuda("%8s %s %d/%d/%d/ %d %s %g " - " %g\n", - get_prec_str(prec), get_recon_str(link_recon), xdim, ydim, zdim, tdim, - get_unitarization_str(reunit_svd_only), max_allowed_error, unittol); +auto test_str + = [](testing::TestParamInfo param) { return std::string(get_prec_str(::testing::get<0>(param.param))); }; + +INSTANTIATE_TEST_SUITE_P(, UnitarizeTest, ::testing::Values(QUDA_DOUBLE_PRECISION, QUDA_SINGLE_PRECISION), test_str); + +struct unitarize_test : public quda_test { + void display_info() const override + { + printfQuda( + "link_precision link_reconstruct space_dimension T_dimension algorithm " + "max allowed error deviation tolerance\n"); + printfQuda( + "%8s %s %d/%d/%d/ %d %s %g " + " %g\n", + get_prec_str(prec), get_recon_str(link_recon), xdim, ydim, zdim, tdim, get_unitarization_str(reunit_svd_only), + max_allowed_error, unittol); + } -#ifdef MULTI_GPU - printfQuda("Grid partition info: X Y Z T\n"); - printfQuda(" %d %d %d %d\n", dimPartitioned(0), dimPartitioned(1), dimPartitioned(2), - dimPartitioned(3)); -#endif -} + unitarize_test(int argc, char **argv) : quda_test("unitarize_test", argc, argv) { } +}; int main(int argc, char **argv) { - // initalize google test, includes command line options - ::testing::InitGoogleTest(&argc, argv); - int test_rc = 0; + unitarize_test test(argc, argv); // default to 18 reconstruct, 8^3 x 8 link_recon = QUDA_RECONSTRUCT_NO; xdim = ydim = zdim = tdim = 8; - auto app = make_app(); - try { - app->parse(argc, argv); - } catch (const CLI::ParseError &e) { - return app->exit(e); - } - - initComms(argc, argv, gridsize_from_cmdline); - initQuda(device_ordinal); + test.init(); - // Ensure gtest prints only from rank 0 - ::testing::TestEventListeners &listeners = ::testing::UnitTest::GetInstance()->listeners(); - if (quda::comm_rank() != 0) { delete listeners.Release(listeners.default_result_printer()); } - - display_test_info(); - int num_failures = unitarize_link_test(test_rc); - int num_procs = 1; -#ifdef MULTI_GPU - quda::comm_allreduce_int(num_failures); - num_procs = quda::comm_size(); -#endif - - printfQuda("Number of failures = %d\n", num_failures); - if (num_failures > 0) { - printfQuda("Failure rate = %lf\n", num_failures / (4.0 * V * num_procs)); - printfQuda("You may want to increase the error tolerance or vary the unitarization parameters\n"); + int test_rc = 0; + if (!enable_testing) { + unitarize(prec); } else { - printfQuda("Unitarization successfull!\n"); + test_rc = test.execute(); } - - endQuda(); - finalizeComms(); - return test_rc; } diff --git a/tests/utils/force_utils.hpp b/tests/utils/force_utils.hpp index c8e06e3919..8a42907809 100644 --- a/tests/utils/force_utils.hpp +++ b/tests/utils/force_utils.hpp @@ -30,13 +30,13 @@ template struct anti_hermitmat { template su3_matrix *get_su3_matrix(quda::GaugeField &p, int idx, int dir) { - auto data = static_cast *const>(p.data(dir)); + auto data = static_cast *>(p.data(dir)); return data + idx; } template const su3_matrix *get_su3_matrix(const quda::GaugeField &p, int idx, int dir) { - auto data = static_cast *const>(p.data(dir)); + auto data = static_cast *>(p.data(dir)); return data + idx; }