From d88b8f1fb485b48fa68e1634315381ecfd2a7073 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Thu, 26 Jun 2025 13:47:45 -0600 Subject: [PATCH 01/37] Add some additional timers around decomp level activity --- src/cpl/nuopc/lnd_comp_nuopc.F90 | 2 ++ src/main/clm_initializeMod.F90 | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/src/cpl/nuopc/lnd_comp_nuopc.F90 b/src/cpl/nuopc/lnd_comp_nuopc.F90 index b7ef7216d9..af1426bf9b 100644 --- a/src/cpl/nuopc/lnd_comp_nuopc.F90 +++ b/src/cpl/nuopc/lnd_comp_nuopc.F90 @@ -642,8 +642,10 @@ subroutine InitializeRealize(gcomp, importState, exportState, clock, rc) if (ChkErr(rc,__LINE__,u_FILE_u)) return call ESMF_GridCompGet(gcomp, vm=vm, rc=rc) if (ChkErr(rc,__LINE__,u_FILE_u)) return + call t_startf ('lc_lnd_set_decomp_and_domain_from_readmesh') call lnd_set_decomp_and_domain_from_readmesh(driver='cmeps', vm=vm, & meshfile_lnd=model_meshfile, meshfile_mask=meshfile_mask, mesh_ctsm=mesh, ni=ni, nj=nj, rc=rc) + call t_stopf ('lc_lnd_set_decomp_and_domain_from_readmesh') if (ChkErr(rc,__LINE__,u_FILE_u)) return end if diff --git a/src/main/clm_initializeMod.F90 b/src/main/clm_initializeMod.F90 index 8c0b50230b..da8185be31 100644 --- a/src/main/clm_initializeMod.F90 +++ b/src/main/clm_initializeMod.F90 @@ -279,7 +279,9 @@ subroutine initialize2(ni,nj, currtime) end if ! Determine decomposition of subgrid scale landunits, columns, patches + call t_startf('clm_decompInit_clumps') call decompInit_clumps(ni, nj, glc_behavior) + call t_stopf('clm_decompInit_clumps') ! *** Get ALL processor bounds - for gridcells, landunit, columns and patches *** call get_proc_bounds(bounds_proc) @@ -304,7 +306,9 @@ subroutine initialize2(ni,nj, currtime) !$OMP END PARALLEL DO ! Set global seg maps for gridcells, landlunits, columns and patches + call t_startf('clm_decompInit_glcp') call decompInit_glcp(ni, nj, glc_behavior) + call t_stopf('clm_decompInit_glcp') if (use_hillslope) then ! Initialize hillslope properties From 6cf4b8e5e40a340e21a1f68898a1041dad8b8f6b Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Thu, 26 Jun 2025 15:36:14 -0600 Subject: [PATCH 02/37] Start adding control for decomp_init self tests and add ability for self tests to exit after running in initialization --- src/main/clm_varctl.F90 | 6 ++++++ src/self_tests/SelfTestDriver.F90 | 26 ++++++++++++++++++++++---- 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/src/main/clm_varctl.F90 b/src/main/clm_varctl.F90 index 41978ae695..b6037c7795 100644 --- a/src/main/clm_varctl.F90 +++ b/src/main/clm_varctl.F90 @@ -52,6 +52,12 @@ module clm_varctl ! true => run tests of ncdio_pio logical, public :: for_testing_run_ncdiopio_tests = .false. + ! true => run tests of decompInit + logical, public :: for_testing_run_decomp_init_tests = .false. + + ! true => exit after the self-tests run + logical, public :: for_testing_after_self_tests_run = .false. + ! true => allocate memory for and use a second grain pool. This is meant only for ! software testing of infrastructure to support the AgSys crop model integration. This ! option can be dropped once AgSys is integrated and we have tests of it. diff --git a/src/self_tests/SelfTestDriver.F90 b/src/self_tests/SelfTestDriver.F90 index d109a27827..28994bc20b 100644 --- a/src/self_tests/SelfTestDriver.F90 +++ b/src/self_tests/SelfTestDriver.F90 @@ -6,10 +6,6 @@ module SelfTestDriver ! ! See the README file in this directory for a high-level overview of these self-tests. - use clm_varctl, only : for_testing_run_ncdiopio_tests - use decompMod, only : bounds_type - use TestNcdioPio, only : test_ncdio_pio - implicit none private save @@ -32,17 +28,39 @@ subroutine self_test_driver(bounds) ! This subroutine should be called all the time, but each set of self tests is only ! run if the appropriate flag is set. ! + ! !USES: + use clm_varctl, only : for_testing_run_ncdiopio_tests, for_testing_run_decomp_init_tests + use clm_varctl, only : for_testing_exit_after_self_tests, iulog + use decompMod, only : bounds_type + use TestNcdioPio, only : test_ncdio_pio + use ESMF, only : ESMF_LogWrite, ESMF_LOGMSG_INFO, ESMF_Finalize + use shr_sys_mod, only : shr_sys_flush ! !ARGUMENTS: type(bounds_type), intent(in) :: bounds ! ! !LOCAL VARIABLES: character(len=*), parameter :: subname = 'self_test_driver' + integer :: ntests = 0 !----------------------------------------------------------------------- if (for_testing_run_ncdiopio_tests) then + ntests = ntests + 1 call test_ncdio_pio(bounds) end if + if (for_testing_run_decomp_init_tests) then + ntests = ntests + 1 + end if + if (for_testing_exit_after_self_tests) then + if ( ntests == 0 )then + write(iulog,*) 'WARNING: You are exiting after self tests were run -- but no self tests were run.' + else + write(iulog,*) 'Exiting after running ', ntests, ' self tests.' + end if + call shr_sys_flush(iulog) + call ESMF_LogWrite(' exiting after running self tests', ESMF_LOGMSG_INFO) + call ESMF_Finalize() + end if end subroutine self_test_driver From a27de36fe7f0f1cb5b14515feb41f0ea55449d9d Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Fri, 27 Jun 2025 16:54:13 -0600 Subject: [PATCH 03/37] Add namelist controls for self testing --- .../namelist_definition_ctsm.xml | 11 +++++++++++ src/cpl/nuopc/lnd_comp_nuopc.F90 | 18 ++++++++++++++++++ src/main/clm_varctl.F90 | 2 +- src/main/controlMod.F90 | 6 +++++- src/self_tests/SelfTestDriver.F90 | 19 +++++++++++-------- 5 files changed, 46 insertions(+), 10 deletions(-) diff --git a/bld/namelist_files/namelist_definition_ctsm.xml b/bld/namelist_files/namelist_definition_ctsm.xml index 820975655d..e116d51b21 100644 --- a/bld/namelist_files/namelist_definition_ctsm.xml +++ b/bld/namelist_files/namelist_definition_ctsm.xml @@ -1242,12 +1242,23 @@ Whether to use subgrid fluxes for snow Whether snow on the vegetation canopy affects the radiation/albedo calculations + +Whether to exit early after the initialization self tests are run. This is typically only used in automated tests. + + Whether to run some tests of ncdio_pio as part of the model run. This is typically only used in automated tests. + +Whether to run some tests of decompInit (to get the gridcell to MPI task decomposition) as part of the model run. This is +typically only used in automated tests. + + If true, allocate memory for and use a second crop grain pool. This is diff --git a/src/cpl/nuopc/lnd_comp_nuopc.F90 b/src/cpl/nuopc/lnd_comp_nuopc.F90 index af1426bf9b..0287e1fce2 100644 --- a/src/cpl/nuopc/lnd_comp_nuopc.F90 +++ b/src/cpl/nuopc/lnd_comp_nuopc.F90 @@ -39,6 +39,7 @@ module lnd_comp_nuopc use clm_varctl , only : single_column, clm_varctl_set, iulog use clm_varctl , only : nsrStartup, nsrContinue, nsrBranch use clm_varctl , only : FL => fname_len + use clm_varctl , only : for_testing_exit_after_self_tests use clm_time_manager , only : set_timemgr_init, advance_timestep use clm_time_manager , only : update_rad_dtime use clm_time_manager , only : get_nstep, get_step_size @@ -492,6 +493,12 @@ subroutine InitializeRealize(gcomp, importState, exportState, clock, rc) else single_column = .false. end if + if ( for_testing_exit_after_self_tests) then + ! ******************* + ! *** RETURN HERE *** + ! ******************* + RETURN + end if !---------------------------------------------------------------------------- ! Reset shr logging to my log file @@ -662,6 +669,9 @@ subroutine InitializeRealize(gcomp, importState, exportState, clock, rc) if (ChkErr(rc,__LINE__,u_FILE_u)) return call initialize2(ni, nj, currtime) + if (for_testing_exit_after_self_tests) then + RETURN + end if !-------------------------------- ! Create land export state @@ -771,6 +781,9 @@ subroutine ModelAdvance(gcomp, rc) if (single_column .and. .not. scol_valid) then RETURN end if + if (for_testing_exit_after_self_tests) then + RETURN + end if !$ call omp_set_num_threads(nthrds) @@ -1002,6 +1015,7 @@ subroutine ModelSetRunClock(gcomp, rc) rc = ESMF_SUCCESS call ESMF_LogWrite(subname//' called', ESMF_LOGMSG_INFO) if (.not. scol_valid) return + if (for_testing_exit_after_self_tests) return ! query the Component for its clocks call NUOPC_ModelGet(gcomp, driverClock=dclock, modelClock=mclock, rc=rc) @@ -1285,6 +1299,7 @@ subroutine clm_orbital_update(clock, logunit, mastertask, eccen, obliqr, lambm0 end subroutine clm_orbital_update subroutine CheckImport(gcomp, rc) + use clm_varctl, only : for_testing_exit_after_self_tests type(ESMF_GridComp) :: gcomp integer, intent(out) :: rc character(len=*) , parameter :: subname = "("//__FILE__//":CheckImport)" @@ -1313,6 +1328,9 @@ subroutine CheckImport(gcomp, rc) if (single_column .and. .not. scol_valid) then RETURN end if + if (for_testing_exit_after_self_tests) then + RETURN + end if ! The remander of this should be equivalent to the NUOPC internal routine ! from NUOPC_ModeBase.F90 diff --git a/src/main/clm_varctl.F90 b/src/main/clm_varctl.F90 index b6037c7795..e51ce4d33c 100644 --- a/src/main/clm_varctl.F90 +++ b/src/main/clm_varctl.F90 @@ -56,7 +56,7 @@ module clm_varctl logical, public :: for_testing_run_decomp_init_tests = .false. ! true => exit after the self-tests run - logical, public :: for_testing_after_self_tests_run = .false. + logical, public :: for_testing_exit_after_self_tests = .false. ! true => allocate memory for and use a second grain pool. This is meant only for ! software testing of infrastructure to support the AgSys crop model integration. This diff --git a/src/main/controlMod.F90 b/src/main/controlMod.F90 index 6d363a9a6e..51bc9ce5c0 100644 --- a/src/main/controlMod.F90 +++ b/src/main/controlMod.F90 @@ -211,6 +211,7 @@ subroutine control_init(dtime) snow_thermal_cond_lake_method, snow_cover_fraction_method, & irrigate, run_zero_weight_urban, all_active, & crop_fsat_equals_zero, for_testing_run_ncdiopio_tests, & + for_testing_run_decomp_init_tests, for_testing_exit_after_self_tests, & for_testing_use_second_grain_pool, for_testing_use_repr_structure_pool, & for_testing_no_crop_seed_replenishment, & z0param_method, use_z0m_snowmelt @@ -764,8 +765,11 @@ subroutine control_spmd() ! Crop saturated excess runoff call mpi_bcast(crop_fsat_equals_zero, 1, MPI_LOGICAL, 0, mpicom, ier) - ! Whether to run tests of ncdio_pio + ! Whether to run self tests call mpi_bcast(for_testing_run_ncdiopio_tests, 1, MPI_LOGICAL, 0, mpicom, ier) + call mpi_bcast(for_testing_run_decomp_init_tests, 1, MPI_LOGICAL, 0, mpicom, ier) + + call mpi_bcast(for_testing_exit_after_self_tests, 1, MPI_LOGICAL, 0, mpicom, ier) ! Various flags used for testing infrastructure for having multiple crop reproductive pools call mpi_bcast(for_testing_use_second_grain_pool, 1, MPI_LOGICAL, 0, mpicom, ier) diff --git a/src/self_tests/SelfTestDriver.F90 b/src/self_tests/SelfTestDriver.F90 index 28994bc20b..dd44a185df 100644 --- a/src/self_tests/SelfTestDriver.F90 +++ b/src/self_tests/SelfTestDriver.F90 @@ -35,6 +35,7 @@ subroutine self_test_driver(bounds) use TestNcdioPio, only : test_ncdio_pio use ESMF, only : ESMF_LogWrite, ESMF_LOGMSG_INFO, ESMF_Finalize use shr_sys_mod, only : shr_sys_flush + use spmdMod, only : masterproc ! !ARGUMENTS: type(bounds_type), intent(in) :: bounds ! @@ -52,14 +53,16 @@ subroutine self_test_driver(bounds) ntests = ntests + 1 end if if (for_testing_exit_after_self_tests) then - if ( ntests == 0 )then - write(iulog,*) 'WARNING: You are exiting after self tests were run -- but no self tests were run.' - else - write(iulog,*) 'Exiting after running ', ntests, ' self tests.' - end if - call shr_sys_flush(iulog) - call ESMF_LogWrite(' exiting after running self tests', ESMF_LOGMSG_INFO) - call ESMF_Finalize() + ! Print out some messaging if we are exiting after self tests. + if ( masterproc ) then + if ( ntests == 0 )then + write(iulog,*) 'WARNING: You are exiting after self tests were run -- but no self tests were run.' + else + write(iulog,*) 'Exiting after running ', ntests, ' self tests.' + end if + call shr_sys_flush(iulog) + call ESMF_LogWrite(' exiting after running self tests', ESMF_LOGMSG_INFO) + end if end if end subroutine self_test_driver From 2fd081b544cc00b2f426fd9d44b12b9d995bf240 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Wed, 2 Jul 2025 10:11:49 -0600 Subject: [PATCH 04/37] Changes to exit early when self test namelist option used for_testing_exit_after_self_tests, change the self tests testmod so that its about initialization, this works with a compset with SATM, but hangs -- because nothing stops the run --- .../testmods_dirs/clm/run_self_tests/README | 9 +++++++-- .../clm/run_self_tests/shell_commands | 4 ++++ .../clm/run_self_tests/user_nl_clm | 6 ++++++ src/cpl/nuopc/lnd_comp_nuopc.F90 | 15 +++++++++++---- src/cpl/nuopc/lnd_import_export.F90 | 19 ++++++++++++++----- 5 files changed, 42 insertions(+), 11 deletions(-) diff --git a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/README b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/README index 938dffbe6f..985b2bfae4 100644 --- a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/README +++ b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/README @@ -1,5 +1,10 @@ -The purpose of this testmod directory is to trigger the runtime -self-tests. This runs a suite of unit/integration tests. +The purpose of this testmod directory is to trigger runtime +initialization self-tests. This runs a set of unit/integration tests +that apply at initialization. We use cold start so that we can get through initialization faster, since how we initialize the model is unimportant for these self-tests. +We also exit as early as possible to minimize the time spent. + +There are other self_tests that need to be exercised in the model time stepping +and are done outside of these. diff --git a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands index d426269206..9383f70de0 100755 --- a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands +++ b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands @@ -3,3 +3,7 @@ # We use this testmod in a _Ln1 test; this requires forcing the ROF coupling frequency to every time step ./xmlchange ROF_NCPL=48 + +# Restarts aren't allowed for these tests, and turn off CPL history +./xmlchange REST_OPTION="never" +./xmlchange HIST_OPTION="never" diff --git a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm index 6187386336..3edb8c7fc7 100644 --- a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm +++ b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm @@ -1 +1,7 @@ for_testing_run_ncdiopio_tests = .true. +for_testing_run_decomp_init_tests = .true. +for_testing_exit_after_self_tests = .true. + +! Turn off history, restarts, and output +hist_empty_htapes = .true. +use_noio = .true. \ No newline at end of file diff --git a/src/cpl/nuopc/lnd_comp_nuopc.F90 b/src/cpl/nuopc/lnd_comp_nuopc.F90 index 0287e1fce2..7245954b4d 100644 --- a/src/cpl/nuopc/lnd_comp_nuopc.F90 +++ b/src/cpl/nuopc/lnd_comp_nuopc.F90 @@ -81,6 +81,7 @@ module lnd_comp_nuopc logical :: glc_present logical :: rof_prognostic + logical :: atm_present logical :: atm_prognostic integer, parameter :: dbug = 0 character(*),parameter :: modName = "(lnd_comp_nuopc)" @@ -285,6 +286,11 @@ subroutine InitializeAdvertise(gcomp, importState, exportState, clock, rc) else atm_prognostic = .true. end if + if (trim(atm_model) == 'satm') then + atm_present = .false. + else + atm_present = .true. + end if call NUOPC_CompAttributeGet(gcomp, name='GLC_model', value=glc_model, rc=rc) if (ChkErr(rc,__LINE__,u_FILE_u)) return if (trim(glc_model) == 'sglc') then @@ -311,6 +317,9 @@ subroutine InitializeAdvertise(gcomp, importState, exportState, clock, rc) write(iulog,'(a )')' rof component = '//trim(rof_model) write(iulog,'(a )')' glc component = '//trim(glc_model) write(iulog,'(a,L2)')' atm_prognostic = ',atm_prognostic + if (.not. atm_present) then + write(iulog,'(a,L2)')' atm_present = ',atm_present + end if write(iulog,'(a,L2)')' rof_prognostic = ',rof_prognostic write(iulog,'(a,L2)')' glc_present = ',glc_present if (glc_present) then @@ -329,7 +338,8 @@ subroutine InitializeAdvertise(gcomp, importState, exportState, clock, rc) call control_setNL("lnd_in"//trim(inst_suffix)) - call advertise_fields(gcomp, flds_scalar_name, glc_present, cism_evolve, rof_prognostic, atm_prognostic, rc) + call advertise_fields(gcomp, flds_scalar_name, glc_present, cism_evolve, rof_prognostic, & + atm_prognostic, atm_present, rc) if (ChkErr(rc,__LINE__,u_FILE_u)) return !---------------------------------------------------------------------------- @@ -669,9 +679,6 @@ subroutine InitializeRealize(gcomp, importState, exportState, clock, rc) if (ChkErr(rc,__LINE__,u_FILE_u)) return call initialize2(ni, nj, currtime) - if (for_testing_exit_after_self_tests) then - RETURN - end if !-------------------------------- ! Create land export state diff --git a/src/cpl/nuopc/lnd_import_export.F90 b/src/cpl/nuopc/lnd_import_export.F90 index 624590b9a6..b8a5efeb8d 100644 --- a/src/cpl/nuopc/lnd_import_export.F90 +++ b/src/cpl/nuopc/lnd_import_export.F90 @@ -156,7 +156,8 @@ module lnd_import_export contains !=============================================================================== - subroutine advertise_fields(gcomp, flds_scalar_name, glc_present, cism_evolve, rof_prognostic, atm_prognostic, rc) + subroutine advertise_fields(gcomp, flds_scalar_name, glc_present, cism_evolve, rof_prognostic, & + atm_prognostic, atm_present, rc) use shr_carma_mod , only : shr_carma_readnl use shr_ndep_mod , only : shr_ndep_readnl @@ -173,6 +174,7 @@ subroutine advertise_fields(gcomp, flds_scalar_name, glc_present, cism_evolve, r logical , intent(in) :: cism_evolve logical , intent(in) :: rof_prognostic logical , intent(in) :: atm_prognostic + logical , intent(in) :: atm_present integer , intent(out) :: rc ! local variables @@ -210,7 +212,9 @@ subroutine advertise_fields(gcomp, flds_scalar_name, glc_present, cism_evolve, r ! Need to determine if there is no land for single column before the advertise call is done - if (atm_prognostic .or. force_send_to_atm) then + if (.not. atm_present)then + send_to_atm = .false. + else if (atm_prognostic .or. force_send_to_atm) then send_to_atm = .true. else send_to_atm = .false. @@ -253,12 +257,11 @@ subroutine advertise_fields(gcomp, flds_scalar_name, glc_present, cism_evolve, r if (shr_megan_mechcomps_n .ne. megan_nflds) call shr_sys_abort('ERROR: megan field count mismatch') ! CARMA volumetric soil water from land - call shr_carma_readnl('drv_flds_in', carma_fields) ! export to atm call fldlist_add(fldsFrLnd_num, fldsFrlnd, trim(flds_scalar_name)) - call fldlist_add(fldsFrLnd_num, fldsFrlnd, 'Sl_lfrin') if (send_to_atm) then + call fldlist_add(fldsFrLnd_num, fldsFrlnd, 'Sl_lfrin') call fldlist_add(fldsFrLnd_num, fldsFrlnd, Sl_t ) call fldlist_add(fldsFrLnd_num, fldsFrlnd, Sl_tref ) call fldlist_add(fldsFrLnd_num, fldsFrlnd, Sl_qref ) @@ -339,6 +342,9 @@ subroutine advertise_fields(gcomp, flds_scalar_name, glc_present, cism_evolve, r call fldlist_add(fldsToLnd_num, fldsToLnd, trim(flds_scalar_name)) + !!!!!!!!!!!!!!!!!!!!!!!!!!! new if section !!!!!!!!!!!!!!!!!!!!!!!!!! + if ( atm_present ) then + ! from atm call fldlist_add(fldsToLnd_num, fldsToLnd, Sa_z ) call fldlist_add(fldsToLnd_num, fldsToLnd, Sa_topo ) @@ -389,6 +395,9 @@ subroutine advertise_fields(gcomp, flds_scalar_name, glc_present, cism_evolve, r call fldlist_add(fldsToLnd_num, fldsToLnd, Sa_co2diag) end if + end if ! atm_present + !!!!!!!!!!!!!!!!!!!!!!!!!!! new if section !!!!!!!!!!!!!!!!!!!!!!!!!! + if (rof_prognostic) then ! from river call fldlist_add(fldsToLnd_num, fldsToLnd, Flrr_flood ) @@ -773,6 +782,7 @@ subroutine export_fields( gcomp, bounds, glc_present, rof_prognostic, & ! output to mediator ! ----------------------- + if (send_to_atm) then call state_setexport_1d(exportState, Sl_lfrin, ldomain%frac(begg:), init_spval=.false., rc=rc) if (ChkErr(rc,__LINE__,u_FILE_u)) return @@ -780,7 +790,6 @@ subroutine export_fields( gcomp, bounds, glc_present, rof_prognostic, & ! output to atm ! ----------------------- - if (send_to_atm) then call state_setexport_1d(exportState, Sl_t , lnd2atm_inst%t_rad_grc(begg:), & init_spval=.true., rc=rc) if (ChkErr(rc,__LINE__,u_FILE_u)) return From 6eaadd4b82fa1f79bd09b98712507f3e30a25203 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Thu, 10 Jul 2025 23:13:06 -0600 Subject: [PATCH 05/37] Bring in the share branch with the memory logger from John Dennis --- .gitmodules | 7 +++++-- share | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/.gitmodules b/.gitmodules index 0d39ebff4e..14e5163b73 100644 --- a/.gitmodules +++ b/.gitmodules @@ -99,8 +99,11 @@ fxDONOTUSEurl = https://github.com/ESCOMP/CDEPS.git [submodule "share"] path = share -url = https://github.com/ESCOMP/CESM_share -fxtag = share1.1.9 +#url = https://github.com/ESCOMP/CESM_share +url = https://github.com/ekluzek/CESM_share +#fxtag = share1.1.9 +#fxtag = add_jdennis_procstatus_module +fxtag = 6fe1530b604fa729a5d363c08272714f95ea6ea1 fxrequired = ToplevelRequired # Standard Fork to compare to with "git fleximod test" to ensure personal forks aren't committed fxDONOTUSEurl = https://github.com/ESCOMP/CESM_share diff --git a/share b/share index 14338bef3f..6fe1530b60 160000 --- a/share +++ b/share @@ -1 +1 @@ -Subproject commit 14338bef3fa604d49160e376257264db1d3313e5 +Subproject commit 6fe1530b604fa729a5d363c08272714f95ea6ea1 From f18e4b034426767869012124f3166c37e1c4996f Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Fri, 11 Jul 2025 14:32:08 -0600 Subject: [PATCH 06/37] Update proc_status_vm to use shr_sys_abort, and iulog from shr_log, and use newunit rather than a hardcoded file unit to make it more robust, it wasn't showing any output before --- .gitmodules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitmodules b/.gitmodules index 15578e5da3..18833ee4c2 100644 --- a/.gitmodules +++ b/.gitmodules @@ -103,7 +103,7 @@ path = share url = https://github.com/ekluzek/CESM_share #fxtag = share1.1.9 #fxtag = add_jdennis_procstatus_module -fxtag = 6fe1530b604fa729a5d363c08272714f95ea6ea1 +fxtag = 3cc3770a92b223556fc210e8f686994b3e4241be fxrequired = ToplevelRequired # Standard Fork to compare to with "git fleximod test" to ensure personal forks aren't committed fxDONOTUSEurl = https://github.com/ESCOMP/CESM_share From 71de4c9165761e9ffecaa0831526f76343352dac Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Fri, 11 Jul 2025 14:33:32 -0600 Subject: [PATCH 07/37] Turn off restarts and history and add some timer options as well as turning off ncdio_pio testing for the purposes of decompInit work --- .../testmods_dirs/clm/run_self_tests/shell_commands | 9 +++++++++ .../testmods_dirs/clm/run_self_tests/user_nl_clm | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands index d1efa2a23c..ca2a393757 100755 --- a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands +++ b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands @@ -12,5 +12,14 @@ ./xmlchange CLM_CONFIGURATION="nwp" # Restarts aren't allowed for these tests, and turn off CPL history +# First change in env_test.xml, then in the standard one so it won't complain there +./xmlchange --force REST_OPTION="never" --file env_test.xml +./xmlchange --force HIST_OPTION="never" --file env_test.xml ./xmlchange REST_OPTION="never" ./xmlchange HIST_OPTION="never" + +# Timer settings +./xmlchange TIMER_DETAIL="2" +./xmlchange SAVE_TIMING="TRUE" +./xmlchange CHECK_TIMING="TRUE" +./xmlchange ESMF_PROFILING_LEVEL="10" diff --git a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm index 3edb8c7fc7..4430f92e10 100644 --- a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm +++ b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm @@ -1,4 +1,4 @@ -for_testing_run_ncdiopio_tests = .true. +for_testing_run_ncdiopio_tests = .false. for_testing_run_decomp_init_tests = .true. for_testing_exit_after_self_tests = .true. From efd21296c22974fb1c15e58d806596ed853c416a Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Mon, 14 Jul 2025 15:43:08 -0600 Subject: [PATCH 08/37] Fix proc_status_vm from the changes I made, it's now reporting properly --- .gitmodules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitmodules b/.gitmodules index 18833ee4c2..7615eec0bb 100644 --- a/.gitmodules +++ b/.gitmodules @@ -103,7 +103,7 @@ path = share url = https://github.com/ekluzek/CESM_share #fxtag = share1.1.9 #fxtag = add_jdennis_procstatus_module -fxtag = 3cc3770a92b223556fc210e8f686994b3e4241be +fxtag = 9973692556da54f9562935be43c1d43b0607d24b fxrequired = ToplevelRequired # Standard Fork to compare to with "git fleximod test" to ensure personal forks aren't committed fxDONOTUSEurl = https://github.com/ESCOMP/CESM_share From d9e212bfc85edd49abeb7715f8e252d8533e9a4f Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Mon, 14 Jul 2025 15:43:26 -0600 Subject: [PATCH 09/37] Fix proc_status_vm from the changes I made, it's now reporting properly --- share | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/share b/share index 6fe1530b60..9973692556 160000 --- a/share +++ b/share @@ -1 +1 @@ -Subproject commit 6fe1530b604fa729a5d363c08272714f95ea6ea1 +Subproject commit 9973692556da54f9562935be43c1d43b0607d24b From 213ff9c9cff62849eff1b85b064db1192aad91cb Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Mon, 14 Jul 2025 15:44:38 -0600 Subject: [PATCH 10/37] Add calls for evaluating memory --- src/main/decompInitMod.F90 | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/src/main/decompInitMod.F90 b/src/main/decompInitMod.F90 index bebcd9d358..930f19c07c 100644 --- a/src/main/decompInitMod.F90 +++ b/src/main/decompInitMod.F90 @@ -12,12 +12,14 @@ module decompInitMod use spmdMod , only : masterproc, iam, npes, mpicom use abortutils , only : endrun use clm_varctl , only : iulog + use proc_status_vm, only : prt_vm_status + use shr_mem_mod , only : shr_mem_init, shr_mem_getusage ! implicit none private ! ! !PUBLIC TYPES: - ! + !b ! !PUBLIC MEMBER FUNCTIONS: public :: decompInit_lnd ! initializes lnd grid decomposition into clumps and processors public :: decompInit_clumps ! initializes atm grid decomposition into clumps @@ -34,6 +36,7 @@ module decompInitMod integer, parameter :: dbug=0 ! 0 = min, 1=normal, 2=much, 3=max character(len=*), parameter :: sourcefile = & __FILE__ + real(r8) :: msize, mrss ! memory usage variables #include ! mpi library include file !------------------------------------------------------------------------------ @@ -72,8 +75,17 @@ subroutine decompInit_lnd(lni, lnj, amask) integer, pointer :: clumpcnt(:) ! clump index counter integer, allocatable :: gdc2glo(:)! used to create gindex_global type(bounds_type) :: bounds ! contains subgrid bounds data + real(r8) :: msize, mrss !------------------------------------------------------------------------------ + call shr_mem_init(prt=.true.) ! initialize memory tracking + if(masterproc) then + call prt_vm_status('CTSM: decompInit_lnd: before') + endif + call shr_mem_getusage( msize, mrss, prt=.true.) + write(iulog,*) 'msize, mrss = ',msize, mrss + call shr_sys_flush(iulog) + lns = lni * lnj !--- set and verify nclumps --- @@ -291,6 +303,13 @@ subroutine decompInit_lnd(lni, lnj, amask) gindex_global(n-procinfo%begg+1) = gdc2glo(n) enddo + if(masterproc) then + call prt_vm_status('CTSM: decompInit_lnd: afterwards before deallocate') + endif + call shr_mem_getusage( msize, mrss, prt=.true.) + write(iulog,*) 'msize, mrss = ',msize, mrss + call shr_sys_flush(iulog) + deallocate(clumpcnt) deallocate(gdc2glo) @@ -349,6 +368,9 @@ subroutine decompInit_clumps(lni,lnj,glc_behavior) character(len=32), parameter :: subname = 'decompInit_clumps' !------------------------------------------------------------------------------ + if(masterproc) then + call prt_vm_status('CTSM: decompInit_clumps: before') + endif !--- assign gridcells to clumps (and thus pes) --- call get_proc_bounds(bounds) begg = bounds%begg; endg = bounds%endg @@ -471,6 +493,10 @@ subroutine decompInit_clumps(lni,lnj,glc_behavior) endif enddo + if(masterproc) then + call prt_vm_status('CTSM: decompInit_clumps: after before deallocate') + endif + deallocate(allvecg,allvecl) deallocate(lcid) From 0abc15c327ab249a9e84137701b2f8c531245267 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Mon, 14 Jul 2025 18:20:21 -0600 Subject: [PATCH 11/37] Put memory stuff only under masterproc to only report on a single task to make it more obvious what is going on because each task should be similar --- src/main/decompInitMod.F90 | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/main/decompInitMod.F90 b/src/main/decompInitMod.F90 index 930f19c07c..187d074d63 100644 --- a/src/main/decompInitMod.F90 +++ b/src/main/decompInitMod.F90 @@ -78,13 +78,13 @@ subroutine decompInit_lnd(lni, lnj, amask) real(r8) :: msize, mrss !------------------------------------------------------------------------------ - call shr_mem_init(prt=.true.) ! initialize memory tracking if(masterproc) then + call shr_mem_init(prt=.true.) ! initialize memory tracking call prt_vm_status('CTSM: decompInit_lnd: before') + call shr_mem_getusage( msize, mrss, prt=.true.) + write(iulog,*) 'msize, mrss = ',msize, mrss + call shr_sys_flush(iulog) endif - call shr_mem_getusage( msize, mrss, prt=.true.) - write(iulog,*) 'msize, mrss = ',msize, mrss - call shr_sys_flush(iulog) lns = lni * lnj @@ -305,10 +305,10 @@ subroutine decompInit_lnd(lni, lnj, amask) if(masterproc) then call prt_vm_status('CTSM: decompInit_lnd: afterwards before deallocate') + call shr_mem_getusage( msize, mrss, prt=.true.) + write(iulog,*) 'msize, mrss = ',msize, mrss + call shr_sys_flush(iulog) endif - call shr_mem_getusage( msize, mrss, prt=.true.) - write(iulog,*) 'msize, mrss = ',msize, mrss - call shr_sys_flush(iulog) deallocate(clumpcnt) deallocate(gdc2glo) From c1bfd8366ffffa58ac2ef5538e8d4067fae7129f Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Tue, 29 Jul 2025 13:25:52 -0600 Subject: [PATCH 12/37] Add a PE layout for mpas13p75 --- cime_config/config_pes.xml | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/cime_config/config_pes.xml b/cime_config/config_pes.xml index bb10b8019c..d0794339db 100644 --- a/cime_config/config_pes.xml +++ b/cime_config/config_pes.xml @@ -2092,6 +2092,44 @@ + + + + + none + + -1 + -80 + -80 + -80 + -80 + -80 + -80 + -80 + -80 + + + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + + + 0 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + + + From 3a325198192b28fe7198256b56061302d833297f Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Tue, 29 Jul 2025 13:27:56 -0600 Subject: [PATCH 13/37] Start adding timers to lnd_set_decomp_and_domain_from_readmesh --- src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 b/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 index 0b066ceb5b..9a2695f2f7 100644 --- a/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 +++ b/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 @@ -20,6 +20,7 @@ module lnd_set_decomp_and_domain use spmdMod , only : masterproc, mpicom use clm_varctl , only : iulog, inst_suffix, FL => fname_len use abortutils , only : endrun + use perf_mod , only : t_startf, t_stopf implicit none private ! except @@ -106,6 +107,7 @@ subroutine lnd_set_decomp_and_domain_from_readmesh(driver, vm, meshfile_lnd, mes call lnd_get_global_dims(ni, nj, gsize, isgrid2d) ! Read in the land mesh from the file + call t_startf('lnd_set_decomp_and_domain_from_readmesh: ESMF mesh') mesh_lndinput = ESMF_MeshCreate(filename=trim(meshfile_lnd), fileformat=ESMF_FILEFORMAT_ESMFMESH, rc=rc) if (ChkErr(rc,__LINE__,u_FILE_u)) return @@ -142,6 +144,7 @@ subroutine lnd_set_decomp_and_domain_from_readmesh(driver, vm, meshfile_lnd, mes else call shr_sys_abort('driver '//trim(driver)//' is not supported, must be lilac or cmeps') end if + call t_stopf('lnd_set_decomp_and_domain_from_readmesh: ESMF mesh') ! Determine lnd decomposition that will be used by ctsm from lndmask_glob call decompInit_lnd(lni=ni, lnj=nj, amask=lndmask_glob) @@ -190,6 +193,7 @@ subroutine lnd_set_decomp_and_domain_from_readmesh(driver, vm, meshfile_lnd, mes end do ! Generate a new mesh on the gindex decomposition + call t_startf('lnd_set_decomp_and_domain_from_readmesh: ESMF mesh on new decomposition') distGrid_ctsm = ESMF_DistGridCreate(arbSeqIndexList=gindex_ctsm, rc=rc) if (ChkErr(rc,__LINE__,u_FILE_u)) return mesh_ctsm = ESMF_MeshCreate(mesh_lndinput, elementDistGrid=distgrid_ctsm, rc=rc) @@ -198,6 +202,7 @@ subroutine lnd_set_decomp_and_domain_from_readmesh(driver, vm, meshfile_lnd, mes ! Set ldomain%lonc, ldomain%latc and ldomain%area call lnd_set_ldomain_gridinfo_from_mesh(mesh_ctsm, vm, gindex_ctsm, begg, endg, isgrid2d, ni, nj, ldomain, rc) if (ChkErr(rc,__LINE__,u_FILE_u)) return + call t_stopf('lnd_set_decomp_and_domain_from_readmesh: ESMF mesh on new decomposition') ! Set ldomain%lfrac ! Create fields on the input decomp and ctsm decomp @@ -206,6 +211,7 @@ subroutine lnd_set_decomp_and_domain_from_readmesh(driver, vm, meshfile_lnd, mes ! Redistribute field_lnd to field_ctsm ! Determine ldomain%frac using ctsm decomposition + call t_startf('lnd_set_decomp_and_domain_from_readmesh: land frac') if (trim(driver) == 'cmeps') then if (trim(meshfile_mask) /= trim(meshfile_lnd)) then @@ -245,6 +251,7 @@ subroutine lnd_set_decomp_and_domain_from_readmesh(driver, vm, meshfile_lnd, mes deallocate(lndfrac_glob) end if + call t_stopf('lnd_set_decomp_and_domain_from_readmesh: land frac') ! Deallocate local pointer memory deallocate(gindex_lnd) From d531303d3aaa62013cdae6197df54c49454c4d26 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Wed, 30 Jul 2025 11:41:32 -0600 Subject: [PATCH 14/37] Turn off RTM rather than increase the ROF coupling frequency --- .../testdefs/testmods_dirs/clm/run_self_tests/shell_commands | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands index ca2a393757..143acc98e7 100755 --- a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands +++ b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands @@ -1,8 +1,8 @@ #!/bin/bash ./xmlchange CLM_FORCE_COLDSTART="on" -# We use this testmod in a _Ln1 test; this requires forcing the ROF coupling frequency to same frequency as DATM -./xmlchange ROF_NCPL='$ATM_NCPL' +# Turn off ROF model when used with compsets that have them +./xmlchange ROF_MODE='NULL' # Turn MEGAN off to run faster ./xmlchange CLM_BLDNML_OPTS='--no-megan' --append From 402584e05a4611926d0855ad63bef6e0d3cb1b01 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Wed, 30 Jul 2025 11:42:24 -0600 Subject: [PATCH 15/37] Turn off urban HAC completely and minimize urban in gridcells --- .../testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm index 4430f92e10..499770f153 100644 --- a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm +++ b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm @@ -4,4 +4,6 @@ for_testing_exit_after_self_tests = .true. ! Turn off history, restarts, and output hist_empty_htapes = .true. -use_noio = .true. \ No newline at end of file +use_noio = .true. +urban_hac = 'OFF' +toosmall_urban = 98.0d00 ! Minimize urban in gridcells From 31ad84635d27dbb4900c544e7e5da8150bb7db76 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Wed, 30 Jul 2025 11:47:35 -0600 Subject: [PATCH 16/37] Add a testmod for mpasa3p75 grid --- .../testdefs/testmods_dirs/clm/mpasa3p75/user_nl_clm | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 cime_config/testdefs/testmods_dirs/clm/mpasa3p75/user_nl_clm diff --git a/cime_config/testdefs/testmods_dirs/clm/mpasa3p75/user_nl_clm b/cime_config/testdefs/testmods_dirs/clm/mpasa3p75/user_nl_clm new file mode 100644 index 0000000000..bfdcfb115b --- /dev/null +++ b/cime_config/testdefs/testmods_dirs/clm/mpasa3p75/user_nl_clm @@ -0,0 +1,6 @@ +! Settings currently required to run at the mpasa3p75 grid +! urbantv files at that resolution and use a redistribution mapping + +stream_fldfilename_urbantv = '/glade/derecho/scratch/bdobbins/ko/tbuildmax.nc' +stream_meshfile_urbantv = '/glade/derecho/scratch/bdobbins/ko/mesh.nc' +urbantvmapalgo = 'redist' From be81f3ad97f546e94a79cd31bae25bd858ef698b Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Wed, 30 Jul 2025 11:53:51 -0600 Subject: [PATCH 17/37] Add decomp initialization test and test list for ultra high resolution (3.75km mpasa) --- cime_config/testdefs/testlist_clm.xml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/cime_config/testdefs/testlist_clm.xml b/cime_config/testdefs/testlist_clm.xml index 478dc59bdd..3f232e7518 100644 --- a/cime_config/testdefs/testlist_clm.xml +++ b/cime_config/testdefs/testlist_clm.xml @@ -14,6 +14,7 @@ matrixcn: Tests exercising the matrix-CN capability aux_clm_mpi_serial: aux_clm tests using mpi-serial. Useful for redoing tests that failed due to https://github.com/ESCOMP/CTSM/issues/2916, after having replaced libraries/mpi-serial with a fresh copy. decomp_init: Initialization tests specifically for examining the PE layout decomposition initialization + uhr_decomp_init: Initialization tests at Ultra High Resolution -- specifically for examining the PE layout decomposition initialization --> @@ -4209,6 +4210,15 @@ + + + + + + + + + From 95ab0147925f81eb66da2f5a041fd1bce7202a3f Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Wed, 30 Jul 2025 13:03:20 -0600 Subject: [PATCH 18/37] Fix syntax and correct 3p75 resolution grid name for test --- cime_config/testdefs/testlist_clm.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cime_config/testdefs/testlist_clm.xml b/cime_config/testdefs/testlist_clm.xml index 3f232e7518..a3dc4d760f 100644 --- a/cime_config/testdefs/testlist_clm.xml +++ b/cime_config/testdefs/testlist_clm.xml @@ -14,7 +14,7 @@ matrixcn: Tests exercising the matrix-CN capability aux_clm_mpi_serial: aux_clm tests using mpi-serial. Useful for redoing tests that failed due to https://github.com/ESCOMP/CTSM/issues/2916, after having replaced libraries/mpi-serial with a fresh copy. decomp_init: Initialization tests specifically for examining the PE layout decomposition initialization - uhr_decomp_init: Initialization tests at Ultra High Resolution -- specifically for examining the PE layout decomposition initialization + uhr_decomp_init: Initialization tests at Ultra High Resolution - specifically for examining the PE layout decomposition initialization --> @@ -4210,7 +4210,7 @@ - + From 6ee90776a4581764afa3bd8f282ce186f4b87e69 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Wed, 30 Jul 2025 13:29:15 -0600 Subject: [PATCH 19/37] Still need to set NCPL_ROF --- .../testdefs/testmods_dirs/clm/run_self_tests/shell_commands | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands index 143acc98e7..2cb24928b5 100755 --- a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands +++ b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands @@ -1,6 +1,9 @@ #!/bin/bash ./xmlchange CLM_FORCE_COLDSTART="on" +# We use this testmod in a _Ln1 test; this requires forcing the ROF coupling frequency to same frequency as DATM +./xmlchange ROF_NCPL='$ATM_NCPL + # Turn off ROF model when used with compsets that have them ./xmlchange ROF_MODE='NULL' From f03a0ebf9eccdf9b2038d8c247df97870bf5a6f0 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Wed, 30 Jul 2025 13:29:50 -0600 Subject: [PATCH 20/37] Fix name of mpasa3p75 testmod in test --- cime_config/testdefs/testlist_clm.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cime_config/testdefs/testlist_clm.xml b/cime_config/testdefs/testlist_clm.xml index a3dc4d760f..1a53e25ae9 100644 --- a/cime_config/testdefs/testlist_clm.xml +++ b/cime_config/testdefs/testlist_clm.xml @@ -4210,7 +4210,7 @@ - + From ed4c49e84f5815dc3d007aca1f869c954ce176bb Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Wed, 30 Jul 2025 13:54:10 -0600 Subject: [PATCH 21/37] Fix syntax error --- .../testdefs/testmods_dirs/clm/run_self_tests/shell_commands | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands index 2cb24928b5..f3144c219d 100755 --- a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands +++ b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands @@ -2,7 +2,7 @@ ./xmlchange CLM_FORCE_COLDSTART="on" # We use this testmod in a _Ln1 test; this requires forcing the ROF coupling frequency to same frequency as DATM -./xmlchange ROF_NCPL='$ATM_NCPL +./xmlchange ROF_NCPL='$ATM_NCPL' # Turn off ROF model when used with compsets that have them ./xmlchange ROF_MODE='NULL' From b5ab98c4d0e225716fb158fccf516f6f6a2dee88 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Wed, 30 Jul 2025 16:15:27 -0600 Subject: [PATCH 22/37] Remove the mpasa15 test from expected fails --- cime_config/testdefs/ExpectedTestFails.xml | 7 ------- 1 file changed, 7 deletions(-) diff --git a/cime_config/testdefs/ExpectedTestFails.xml b/cime_config/testdefs/ExpectedTestFails.xml index ac35ad812e..51f85ba8f3 100644 --- a/cime_config/testdefs/ExpectedTestFails.xml +++ b/cime_config/testdefs/ExpectedTestFails.xml @@ -363,11 +363,4 @@ - - - FAIL - #3316 - - - From 8914b12abfe465c97e5a3f7afe23fb72041c6a6d Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Thu, 31 Jul 2025 14:08:14 -0600 Subject: [PATCH 23/37] Add timers for clm_initialize2 that cover the whole subroutine --- src/main/clm_initializeMod.F90 | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/src/main/clm_initializeMod.F90 b/src/main/clm_initializeMod.F90 index da8185be31..0138fba686 100644 --- a/src/main/clm_initializeMod.F90 +++ b/src/main/clm_initializeMod.F90 @@ -225,6 +225,7 @@ subroutine initialize2(ni,nj, currtime) call t_startf('clm_init2') + call t_startf('clm_init2_part1') ! Get processor bounds for gridcells call get_proc_bounds(bounds_proc) begg = bounds_proc%begg; endg = bounds_proc%endg @@ -277,12 +278,14 @@ subroutine initialize2(ni,nj, currtime) call CLMFatesGlobals2() end if + call t_stopf('clm_init2_part1') ! Determine decomposition of subgrid scale landunits, columns, patches call t_startf('clm_decompInit_clumps') call decompInit_clumps(ni, nj, glc_behavior) call t_stopf('clm_decompInit_clumps') + call t_startf('clm_init2_subgrid') ! *** Get ALL processor bounds - for gridcells, landunit, columns and patches *** call get_proc_bounds(bounds_proc) @@ -304,12 +307,14 @@ subroutine initialize2(ni,nj, currtime) call initGridCells(bounds_clump, glc_behavior) end do !$OMP END PARALLEL DO + call t_stopf('clm_init2_subgrid') ! Set global seg maps for gridcells, landlunits, columns and patches call t_startf('clm_decompInit_glcp') call decompInit_glcp(ni, nj, glc_behavior) call t_stopf('clm_decompInit_glcp') + call t_startf('clm_init2_part2') if (use_hillslope) then ! Initialize hillslope properties call InitHillslope(bounds_proc, hillslope_file) @@ -369,15 +374,15 @@ subroutine initialize2(ni,nj, currtime) if (use_fates) call CLMFatesTimesteps() ! Initialize daylength from the previous time step (needed so prev_dayl can be set correctly) - call t_startf('init_orbd') calday = get_curr_calday(reuse_day_365_for_day_366=.true.) call shr_orb_decl( calday, eccen, mvelpp, lambm0, obliqr, declin, eccf ) dtime = get_step_size_real() caldaym1 = get_curr_calday(offset=-int(dtime), reuse_day_365_for_day_366=.true.) call shr_orb_decl( caldaym1, eccen, mvelpp, lambm0, obliqr, declinm1, eccf ) - call t_stopf('init_orbd') call InitDaylength(bounds_proc, declin=declin, declinm1=declinm1, obliquity=obliqr) + call t_stopf('clm_init2_part2') + call t_startf('clm_init2_part3') ! Initialize Balance checking (after time-manager) call BalanceCheckInit() @@ -424,13 +429,13 @@ subroutine initialize2(ni,nj, currtime) ! Print history field info to standard out call hist_printflds() + call t_stopf('clm_init2_part3') + call t_startf('clm_init2_part4') ! Initializate dynamic subgrid weights (for prescribed transient Patches, CNDV ! and/or dynamic landunits); note that these will be overwritten in a restart run - call t_startf('init_dyn_subgrid') call init_subgrid_weights_mod(bounds_proc) call dynSubgrid_init(bounds_proc, glc_behavior, crop_inst) - call t_stopf('init_dyn_subgrid') ! Initialize fates LUH2 usage if (use_fates_luh) then @@ -546,10 +551,12 @@ subroutine initialize2(ni,nj, currtime) call restFile_read(bounds_proc, fnamer, glc_behavior, & reset_dynbal_baselines_lake_columns = reset_dynbal_baselines_lake_columns) end if + call t_stopf('clm_init2_part4') ! If appropriate, create interpolated initial conditions if (nsrest == nsrStartup .and. finidat_interp_source /= ' ') then + call t_startf('clm_init2_init_interp') ! Check that finidat is not cold start - abort if it is if (finidat /= ' ') then call endrun(msg='ERROR clm_initializeMod: '//& @@ -599,8 +606,10 @@ subroutine initialize2(ni,nj, currtime) close(iun) write(iulog,'(a)')' Successfully wrote finidat status file '//trim(locfn) end if + call t_stopf('clm_init2_init_interp') end if + call t_startf('clm_init2_part5') ! If requested, reset dynbal baselines ! This needs to happen after reading the restart file (including after reading the ! interpolated restart file, if applicable). @@ -773,7 +782,6 @@ subroutine initialize2(ni,nj, currtime) deallocate(topo_glc_mec, fert_cft, irrig_method) ! Write log output for end of initialization - call t_startf('init_wlog') if (masterproc) then write(iulog,*) 'Successfully initialized the land model' if (nsrest == nsrStartup) then @@ -788,15 +796,17 @@ subroutine initialize2(ni,nj, currtime) write(iulog,'(72a1)') ("*",i=1,60) write(iulog,*) endif - call t_stopf('init_wlog') + call t_stopf('clm_init2_part5') if (water_inst%DoConsistencyCheck()) then + call t_startf('tracer_consistency_check') !$OMP PARALLEL DO PRIVATE (nc, bounds_clump) do nc = 1,nclumps call get_clump_bounds(nc, bounds_clump) call water_inst%TracerConsistencyCheck(bounds_clump, 'end of initialization') end do !$OMP END PARALLEL DO + call t_stopf('tracer_consistency_check') end if call t_stopf('clm_init2') From f294b31b68147422b7d3c891df5b59e1e5d6bd70 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Thu, 31 Jul 2025 15:03:28 -0600 Subject: [PATCH 24/37] Add another timer within part3, and also turn off some of the history stuff in it when use_noio is TRUE --- src/main/clm_initializeMod.F90 | 43 ++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/src/main/clm_initializeMod.F90 b/src/main/clm_initializeMod.F90 index 0138fba686..e12b528fef 100644 --- a/src/main/clm_initializeMod.F90 +++ b/src/main/clm_initializeMod.F90 @@ -144,6 +144,7 @@ subroutine initialize2(ni,nj, currtime) use clm_varctl , only : use_hillslope use clm_varorb , only : eccen, mvelpp, lambm0, obliqr use clm_varctl , only : use_cropcal_streams + use clm_varctl , only : use_noio use landunit_varcon , only : landunit_varcon_init, max_lunit, numurbl use pftconMod , only : pftcon use decompInitMod , only : decompInit_clumps, decompInit_glcp @@ -382,12 +383,14 @@ subroutine initialize2(ni,nj, currtime) call InitDaylength(bounds_proc, declin=declin, declinm1=declinm1, obliquity=obliqr) call t_stopf('clm_init2_part2') - call t_startf('clm_init2_part3') ! Initialize Balance checking (after time-manager) + call t_startf('balance_check_init') call BalanceCheckInit() + call t_stopf('balance_check_init') + call t_startf('clm_init2_part3') ! History file variables - if (use_cn) then + if (use_cn .and. .not. use_noio ) then call hist_addfld1d (fname='DAYL', units='s', & avgflag='A', long_name='daylength', & ptr_gcell=grc%dayl, default='inactive') @@ -403,21 +406,23 @@ subroutine initialize2(ni,nj, currtime) ! First put in history calls for subgrid data structures - these cannot appear in the ! module for the subgrid data definition due to circular dependencies that are introduced - data2dptr => col%dz(:,-nlevsno+1:0) - col%dz(bounds_proc%begc:bounds_proc%endc,:) = spval - call hist_addfld2d (fname='SNO_Z', units='m', type2d='levsno', & - avgflag='A', long_name='Snow layer thicknesses', & - ptr_col=data2dptr, no_snow_behavior=no_snow_normal, default='inactive') - - call hist_addfld2d (fname='SNO_Z_ICE', units='m', type2d='levsno', & - avgflag='A', long_name='Snow layer thicknesses (ice landunits only)', & - ptr_col=data2dptr, no_snow_behavior=no_snow_normal, & - l2g_scale_type='ice', default='inactive') - - col%zii(bounds_proc%begc:bounds_proc%endc) = spval - call hist_addfld1d (fname='ZII', units='m', & - avgflag='A', long_name='convective boundary height', & - ptr_col=col%zii, default='inactive') + if ( .not. use_noio )then + data2dptr => col%dz(:,-nlevsno+1:0) + col%dz(bounds_proc%begc:bounds_proc%endc,:) = spval + call hist_addfld2d (fname='SNO_Z', units='m', type2d='levsno', & + avgflag='A', long_name='Snow layer thicknesses', & + ptr_col=data2dptr, no_snow_behavior=no_snow_normal, default='inactive') + + call hist_addfld2d (fname='SNO_Z_ICE', units='m', type2d='levsno', & + avgflag='A', long_name='Snow layer thicknesses (ice landunits only)', & + ptr_col=data2dptr, no_snow_behavior=no_snow_normal, & + l2g_scale_type='ice', default='inactive') + + col%zii(bounds_proc%begc:bounds_proc%endc) = spval + call hist_addfld1d (fname='ZII', units='m', & + avgflag='A', long_name='convective boundary height', & + ptr_col=col%zii, default='inactive') + end if ! Initialize instances of all derived types as well as time constant variables call clm_instInit(bounds_proc) @@ -428,7 +433,9 @@ subroutine initialize2(ni,nj, currtime) call SnowAge_init( ) ! SNICAR aging parameters: ! Print history field info to standard out - call hist_printflds() + if ( .not. use_noio )then + call hist_printflds() + end if call t_stopf('clm_init2_part3') call t_startf('clm_init2_part4') From 1bd240844aa037d2012a089a331f194d4872dd30 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Thu, 31 Jul 2025 15:20:33 -0600 Subject: [PATCH 25/37] Balance check doesn't take time, so adjust the timers again for part3 --- src/main/clm_initializeMod.F90 | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/clm_initializeMod.F90 b/src/main/clm_initializeMod.F90 index e12b528fef..6647af0dda 100644 --- a/src/main/clm_initializeMod.F90 +++ b/src/main/clm_initializeMod.F90 @@ -383,12 +383,10 @@ subroutine initialize2(ni,nj, currtime) call InitDaylength(bounds_proc, declin=declin, declinm1=declinm1, obliquity=obliqr) call t_stopf('clm_init2_part2') + call t_startf('clm_init2_part3') ! Initialize Balance checking (after time-manager) - call t_startf('balance_check_init') call BalanceCheckInit() - call t_stopf('balance_check_init') - call t_startf('clm_init2_part3') ! History file variables if (use_cn .and. .not. use_noio ) then call hist_addfld1d (fname='DAYL', units='s', & @@ -426,7 +424,9 @@ subroutine initialize2(ni,nj, currtime) ! Initialize instances of all derived types as well as time constant variables call clm_instInit(bounds_proc) + call t_stopf('clm_init2_part3') + call t_startf('clm_init2_snow_soil_init') call CNParamsSetSoilDepth() ! Initialize SNICAR optical and aging parameters call SnowOptics_init( ) ! SNICAR optical parameters: @@ -436,7 +436,7 @@ subroutine initialize2(ni,nj, currtime) if ( .not. use_noio )then call hist_printflds() end if - call t_stopf('clm_init2_part3') + call t_stopf('clm_init2_snow_soil_init') call t_startf('clm_init2_part4') ! Initializate dynamic subgrid weights (for prescribed transient Patches, CNDV From 60bd85e71b379bc4108b2164e4a3953439468080 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Thu, 31 Jul 2025 23:04:12 -0600 Subject: [PATCH 26/37] Add memory checking calls through the lnd_set_decomp_and_domain_from_readmesh subroutine --- .../share_esmf/lnd_set_decomp_and_domain.F90 | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 b/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 index 9a2695f2f7..2e5485f2af 100644 --- a/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 +++ b/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 @@ -56,6 +56,9 @@ subroutine lnd_set_decomp_and_domain_from_readmesh(driver, vm, meshfile_lnd, mes use decompMod , only : gindex_global, bounds_type, get_proc_bounds use clm_varpar , only : nlevsoi use clm_varctl , only : use_soil_moisture_streams + use proc_status_vm, only : prt_vm_status + use shr_mem_mod , only : shr_mem_getusage + use shr_sys_mod , only : shr_sys_flush ! input/output variables character(len=*) , intent(in) :: driver ! cmeps or lilac @@ -86,6 +89,7 @@ subroutine lnd_set_decomp_and_domain_from_readmesh(driver, vm, meshfile_lnd, mes real(r8) , pointer :: lndfrac_glob(:) real(r8) , pointer :: lndfrac_loc_input(:) real(r8) , pointer :: dataptr1d(:) + real(r8) :: msize, mrss !------------------------------------------------------------------------------- rc = ESMF_SUCCESS @@ -103,6 +107,13 @@ subroutine lnd_set_decomp_and_domain_from_readmesh(driver, vm, meshfile_lnd, mes write(iulog,*) end if + if(masterproc) then + call prt_vm_status('CTSM: lnd_set_decomp_and_domain_from_readmesh: before allocate') + call shr_mem_getusage( msize, mrss, prt=.true.) + write(iulog,*) 'msize, mrss = ',msize, mrss + call shr_sys_flush(iulog) + endif + ! Determine global 2d sizes from read of dimensions of surface dataset and allocate global memory call lnd_get_global_dims(ni, nj, gsize, isgrid2d) @@ -191,6 +202,12 @@ subroutine lnd_set_decomp_and_domain_from_readmesh(driver, vm, meshfile_lnd, mes gindex_ctsm(n) = gindex_ocn(n-nlnd) end if end do + if(masterproc) then + call prt_vm_status('CTSM: lnd_set_decomp_and_domain_from_readmesh: after allocates') + call shr_mem_getusage( msize, mrss, prt=.true.) + write(iulog,*) 'msize, mrss = ',msize, mrss + call shr_sys_flush(iulog) + endif ! Generate a new mesh on the gindex decomposition call t_startf('lnd_set_decomp_and_domain_from_readmesh: ESMF mesh on new decomposition') @@ -252,12 +269,25 @@ subroutine lnd_set_decomp_and_domain_from_readmesh(driver, vm, meshfile_lnd, mes end if call t_stopf('lnd_set_decomp_and_domain_from_readmesh: land frac') + if(masterproc) then + call prt_vm_status('CTSM: lnd_set_decomp_and_domain_from_readmesh: just before deallocates') + call shr_mem_getusage( msize, mrss, prt=.true.) + write(iulog,*) 'msize, mrss = ',msize, mrss + call shr_sys_flush(iulog) + endif ! Deallocate local pointer memory deallocate(gindex_lnd) deallocate(gindex_ocn) deallocate(gindex_ctsm) + if(masterproc) then + call prt_vm_status('CTSM: lnd_set_decomp_and_domain_from_readmesh: after deallocates') + call shr_mem_getusage( msize, mrss, prt=.true.) + write(iulog,*) 'msize, mrss = ',msize, mrss + call shr_sys_flush(iulog) + endif + end subroutine lnd_set_decomp_and_domain_from_readmesh !=============================================================================== From 8c5debbb251b3cd840619064d556730a5c445b0a Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Fri, 1 Aug 2025 00:24:04 -0600 Subject: [PATCH 27/37] Remove one of the memory checks as it wasn't needed --- src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 b/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 index 2e5485f2af..caaa9ca8d3 100644 --- a/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 +++ b/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 @@ -202,12 +202,6 @@ subroutine lnd_set_decomp_and_domain_from_readmesh(driver, vm, meshfile_lnd, mes gindex_ctsm(n) = gindex_ocn(n-nlnd) end if end do - if(masterproc) then - call prt_vm_status('CTSM: lnd_set_decomp_and_domain_from_readmesh: after allocates') - call shr_mem_getusage( msize, mrss, prt=.true.) - write(iulog,*) 'msize, mrss = ',msize, mrss - call shr_sys_flush(iulog) - endif ! Generate a new mesh on the gindex decomposition call t_startf('lnd_set_decomp_and_domain_from_readmesh: ESMF mesh on new decomposition') From 3f5cff59b34aca05dc222a94f77968f04d8ef601 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Fri, 1 Aug 2025 10:40:51 -0600 Subject: [PATCH 28/37] Add some timers for clmInstInit --- src/main/clm_instMod.F90 | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/main/clm_instMod.F90 b/src/main/clm_instMod.F90 index 7d9a0f6ad2..b284836159 100644 --- a/src/main/clm_instMod.F90 +++ b/src/main/clm_instMod.F90 @@ -229,6 +229,7 @@ subroutine clm_instInit(bounds) integer :: dummy_to_make_pgi_happy !---------------------------------------------------------------------- + call t_startf('clm_instInit_part1') ! Note: h2osno_col and snow_depth_col are initialized as local variables ! since they are needed to initialize vertical data structures @@ -286,6 +287,9 @@ subroutine clm_instInit(bounds) call setSoilLayerClass(bounds) endif + call t_stopf('clm_instInit_part1') + + call t_startf('clm_instInit_part2') !----------------------------------------------- ! Set cold-start values for snow levels, snow layers and snow interfaces !----------------------------------------------- @@ -338,6 +342,10 @@ subroutine clm_instInit(bounds) call glacier_smb_inst%Init(bounds) + call t_stopf('clm_instInit_part2') + + call t_startf('clm_instInit_part3') + ! COMPILER_BUG(wjs, 2014-11-29, pgi 14.7) Without the following assignment, the ! assertion in energyflux_inst%Init fails with pgi 14.7 on yellowstone, presumably due ! to a compiler bug. @@ -370,6 +378,10 @@ subroutine clm_instInit(bounds) call surfrad_inst%Init(bounds) + call t_stopf('clm_instInit_part3') + + call t_startf('clm_instInit_part4') + allocate(dust_emis_inst, source = create_dust_emissions(bounds, NLFilename)) allocate(scf_method, source = CreateAndInitSnowCoverFraction( & @@ -401,6 +413,10 @@ subroutine clm_instInit(bounds) call drydepvel_inst%Init(bounds) + call t_stopf('clm_instInit_part4') + + call t_startf('clm_instInit_part5') + if_decomp: if (decomp_method /= no_soil_decomp) then ! Initialize soilbiogeochem_state_inst @@ -473,6 +489,7 @@ subroutine clm_instInit(bounds) deallocate (h2osno_col) deallocate (snow_depth_col) deallocate (exice_init_conc_col) + call t_stopf('clm_instInit_part5') ! ------------------------------------------------------------------------ ! Initialize accumulated fields From b30d9e03ed5382e0dd30ad81e6d9db3b039139cf Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Fri, 1 Aug 2025 11:18:23 -0600 Subject: [PATCH 29/37] Combine timers for part3/4/5 as they are all small --- src/main/clm_instMod.F90 | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/src/main/clm_instMod.F90 b/src/main/clm_instMod.F90 index b284836159..bc86749d06 100644 --- a/src/main/clm_instMod.F90 +++ b/src/main/clm_instMod.F90 @@ -378,10 +378,6 @@ subroutine clm_instInit(bounds) call surfrad_inst%Init(bounds) - call t_stopf('clm_instInit_part3') - - call t_startf('clm_instInit_part4') - allocate(dust_emis_inst, source = create_dust_emissions(bounds, NLFilename)) allocate(scf_method, source = CreateAndInitSnowCoverFraction( & @@ -413,10 +409,6 @@ subroutine clm_instInit(bounds) call drydepvel_inst%Init(bounds) - call t_stopf('clm_instInit_part4') - - call t_startf('clm_instInit_part5') - if_decomp: if (decomp_method /= no_soil_decomp) then ! Initialize soilbiogeochem_state_inst @@ -489,7 +481,7 @@ subroutine clm_instInit(bounds) deallocate (h2osno_col) deallocate (snow_depth_col) deallocate (exice_init_conc_col) - call t_stopf('clm_instInit_part5') + call t_stopf('clm_instInit_part3') ! ------------------------------------------------------------------------ ! Initialize accumulated fields From 373b84ce078d722140bbc5f58c5ab5b4e146e00e Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Fri, 1 Aug 2025 11:19:17 -0600 Subject: [PATCH 30/37] Add timers for urbantv Init and InitVertical --- src/cpl/share_esmf/UrbanTimeVarType.F90 | 4 ++++ src/main/initVerticalMod.F90 | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/src/cpl/share_esmf/UrbanTimeVarType.F90 b/src/cpl/share_esmf/UrbanTimeVarType.F90 index 1e6d004e96..cb30cfe02c 100644 --- a/src/cpl/share_esmf/UrbanTimeVarType.F90 +++ b/src/cpl/share_esmf/UrbanTimeVarType.F90 @@ -16,6 +16,7 @@ module UrbanTimeVarType use clm_varcon , only : spval use LandunitType , only : lun use GridcellType , only : grc + use perf_mod , only : t_startf, t_stopf ! implicit none private @@ -143,6 +144,8 @@ subroutine urbantv_init(this, bounds, NLFilename) stream_meshfile_urbantv, & urbantv_tintalgo + call t_startf("urbantv_init") + ! Default values for namelist stream_year_first_urbantv = 1 ! first year in stream to use stream_year_last_urbantv = 1 ! last year in stream to use @@ -219,6 +222,7 @@ subroutine urbantv_init(this, bounds, NLFilename) if (ESMF_LogFoundError(rcToCheck=rc, msg=ESMF_LOGERR_PASSTHRU, line=__LINE__, file=__FILE__)) then call ESMF_Finalize(endflag=ESMF_END_ABORT) end if + call t_stopf("urbantv_init") end subroutine urbantv_init diff --git a/src/main/initVerticalMod.F90 b/src/main/initVerticalMod.F90 index 64383e7a7c..4a1177666e 100644 --- a/src/main/initVerticalMod.F90 +++ b/src/main/initVerticalMod.F90 @@ -29,6 +29,7 @@ module initVerticalMod use ColumnType , only : col use glcBehaviorMod , only : glc_behavior_type use abortUtils , only : endrun + use perf_mod , only : t_startf, t_stopf use ncdio_pio ! ! !PUBLIC TYPES: @@ -189,6 +190,7 @@ subroutine initVertical(bounds, glc_behavior, thick_wall, thick_roof) integer :: jmin_bedrock character(len=*), parameter :: subname = 'initVertical' !------------------------------------------------------------------------ + call t_startf('initVertical') begc = bounds%begc; endc= bounds%endc begl = bounds%begl; endl= bounds%endl @@ -669,6 +671,8 @@ subroutine initVertical(bounds, glc_behavior, thick_wall, thick_roof) call ncd_pio_closefile(ncid) + call t_stopf('initVertical') + end subroutine initVertical !----------------------------------------------------------------------- From 4f7de29eb23fd0abcc6c5d970c15e80905362887 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Fri, 1 Aug 2025 12:13:44 -0600 Subject: [PATCH 31/37] Add a timer around just the strdata_init --- src/cpl/share_esmf/UrbanTimeVarType.F90 | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/cpl/share_esmf/UrbanTimeVarType.F90 b/src/cpl/share_esmf/UrbanTimeVarType.F90 index cb30cfe02c..2870ae9b5c 100644 --- a/src/cpl/share_esmf/UrbanTimeVarType.F90 +++ b/src/cpl/share_esmf/UrbanTimeVarType.F90 @@ -198,6 +198,7 @@ subroutine urbantv_init(this, bounds, NLFilename) endif ! Initialize the cdeps data type this%sdat_urbantv + call t_startf("str_data_init") call shr_strdata_init_from_inline(this%sdat_urbantv, & my_task = iam, & logunit = iulog, & @@ -222,6 +223,8 @@ subroutine urbantv_init(this, bounds, NLFilename) if (ESMF_LogFoundError(rcToCheck=rc, msg=ESMF_LOGERR_PASSTHRU, line=__LINE__, file=__FILE__)) then call ESMF_Finalize(endflag=ESMF_END_ABORT) end if + call t_stopf("str_data_init") + call t_stopf("urbantv_init") end subroutine urbantv_init From 7f03d77dba888624d79b16085a417d1fdd37c052 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Fri, 1 Aug 2025 12:31:18 -0600 Subject: [PATCH 32/37] Make an internal subroutine for deallocation inside of lnd_set_decomp_and_domain_from_readmesh --- .../share_esmf/lnd_set_decomp_and_domain.F90 | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 b/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 index caaa9ca8d3..1de6ecddde 100644 --- a/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 +++ b/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 @@ -91,7 +91,6 @@ subroutine lnd_set_decomp_and_domain_from_readmesh(driver, vm, meshfile_lnd, mes real(r8) , pointer :: dataptr1d(:) real(r8) :: msize, mrss !------------------------------------------------------------------------------- - rc = ESMF_SUCCESS ! Write diag info @@ -271,9 +270,7 @@ subroutine lnd_set_decomp_and_domain_from_readmesh(driver, vm, meshfile_lnd, mes endif ! Deallocate local pointer memory - deallocate(gindex_lnd) - deallocate(gindex_ocn) - deallocate(gindex_ctsm) + call from_readmesh_dealloc() if(masterproc) then call prt_vm_status('CTSM: lnd_set_decomp_and_domain_from_readmesh: after deallocates') @@ -282,6 +279,20 @@ subroutine lnd_set_decomp_and_domain_from_readmesh(driver, vm, meshfile_lnd, mes call shr_sys_flush(iulog) endif + + !=============================================================================== + ! Internal subroutines for this subroutine + contains + !=============================================================================== + + subroutine from_readmesh_dealloc() + deallocate(gindex_lnd) + deallocate(gindex_ocn) + deallocate(gindex_ctsm) + end subroutine from_readmesh_dealloc + + !------------------------------------------------------------------------------- + end subroutine lnd_set_decomp_and_domain_from_readmesh !=============================================================================== From 02f894e4d4c8f9328318fe4fea49ef5c3a6a3cf1 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Fri, 1 Aug 2025 12:46:34 -0600 Subject: [PATCH 33/37] Add release of the ESMF objects in the lnd_set_decomp_and_domain_from_readmesh subroutine --- .../share_esmf/lnd_set_decomp_and_domain.F90 | 26 ++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 b/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 index 1de6ecddde..4e3714e125 100644 --- a/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 +++ b/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 @@ -269,8 +269,9 @@ subroutine lnd_set_decomp_and_domain_from_readmesh(driver, vm, meshfile_lnd, mes call shr_sys_flush(iulog) endif - ! Deallocate local pointer memory - call from_readmesh_dealloc() + ! Deallocate local pointer memory including ESMF objects + call from_readmesh_dealloc( rc ) + if (chkerr(rc,__LINE__,u_FILE_u)) return if(masterproc) then call prt_vm_status('CTSM: lnd_set_decomp_and_domain_from_readmesh: after deallocates') @@ -285,10 +286,29 @@ subroutine lnd_set_decomp_and_domain_from_readmesh(driver, vm, meshfile_lnd, mes contains !=============================================================================== - subroutine from_readmesh_dealloc() + subroutine from_readmesh_dealloc( rc ) + use ESMF, only : ESMF_FieldRedistRelease, ESMF_DistGridDestroy, ESMF_FieldDestroy, ESMF_MeshDestroy + integer, intent(out) :: rc ! ESMF return code to indicate deallocate was successful + + rc = ESMF_SUCCESS + deallocate(gindex_lnd) deallocate(gindex_ocn) deallocate(gindex_ctsm) + ! Destroy or release all of the ESMF objects + call ESMF_FieldRedistRelease( rhandle_lnd2ctsm, rc=rc) + if (chkerr(rc,__LINE__,u_FILE_u)) return + call ESMF_DistGridDestroy( distgrid_ctsm, rc=rc) + if (chkerr(rc,__LINE__,u_FILE_u)) return + call ESMF_FieldDestroy( field_lnd, rc=rc) + if (chkerr(rc,__LINE__,u_FILE_u)) return + call ESMF_FieldDestroy( field_ctsm, rc=rc) + if (chkerr(rc,__LINE__,u_FILE_u)) return + call ESMF_MeshDestroy( mesh_maskinput, rc=rc) + if (chkerr(rc,__LINE__,u_FILE_u)) return + call ESMF_MeshDestroy( mesh_lndinput, rc=rc) + if (chkerr(rc,__LINE__,u_FILE_u)) return + end subroutine from_readmesh_dealloc !------------------------------------------------------------------------------- From 57b04cdbc5b6ec54266a87af2c1c80bf14c92542 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Fri, 1 Aug 2025 13:08:24 -0600 Subject: [PATCH 34/37] ESMF tells me that some of these objects are used later and can not be destroyed so remove the destroy for the distgrid, and the two meshes, this runs but doesn't seem to lower memory --- src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 b/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 index 4e3714e125..bb57b16ee6 100644 --- a/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 +++ b/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 @@ -290,6 +290,8 @@ subroutine from_readmesh_dealloc( rc ) use ESMF, only : ESMF_FieldRedistRelease, ESMF_DistGridDestroy, ESMF_FieldDestroy, ESMF_MeshDestroy integer, intent(out) :: rc ! ESMF return code to indicate deallocate was successful + logical :: no_esmf_garbage = .true. ! If .true. release all ESMF data (which can be problematic if referenced again) + rc = ESMF_SUCCESS deallocate(gindex_lnd) @@ -298,16 +300,16 @@ subroutine from_readmesh_dealloc( rc ) ! Destroy or release all of the ESMF objects call ESMF_FieldRedistRelease( rhandle_lnd2ctsm, rc=rc) if (chkerr(rc,__LINE__,u_FILE_u)) return - call ESMF_DistGridDestroy( distgrid_ctsm, rc=rc) - if (chkerr(rc,__LINE__,u_FILE_u)) return + !call ESMF_DistGridDestroy( distgrid_ctsm, rc=rc) + !if (chkerr(rc,__LINE__,u_FILE_u)) return call ESMF_FieldDestroy( field_lnd, rc=rc) if (chkerr(rc,__LINE__,u_FILE_u)) return call ESMF_FieldDestroy( field_ctsm, rc=rc) if (chkerr(rc,__LINE__,u_FILE_u)) return - call ESMF_MeshDestroy( mesh_maskinput, rc=rc) - if (chkerr(rc,__LINE__,u_FILE_u)) return - call ESMF_MeshDestroy( mesh_lndinput, rc=rc) - if (chkerr(rc,__LINE__,u_FILE_u)) return + !call ESMF_MeshDestroy( mesh_maskinput, rc=rc) + !if (chkerr(rc,__LINE__,u_FILE_u)) return + !call ESMF_MeshDestroy( mesh_lndinput, rc=rc) + !if (chkerr(rc,__LINE__,u_FILE_u)) return end subroutine from_readmesh_dealloc From 8cf101afea39160a3b032d2d9352b2b108929914 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Fri, 1 Aug 2025 13:52:08 -0600 Subject: [PATCH 35/37] Turn on removing all ESMF garbage for the things deleted, and add note about leaving the distgrid around, and also delete the meshes as it seems to work with this in place --- .../share_esmf/lnd_set_decomp_and_domain.F90 | 21 ++++++++++++------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 b/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 index bb57b16ee6..6cbdde43f6 100644 --- a/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 +++ b/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 @@ -72,7 +72,7 @@ subroutine lnd_set_decomp_and_domain_from_readmesh(driver, vm, meshfile_lnd, mes ! local variables type(ESMF_Mesh) :: mesh_maskinput type(ESMF_Mesh) :: mesh_lndinput - type(ESMF_DistGrid) :: distgrid_ctsm + type(ESMF_DistGrid) :: distgrid_ctsm ! This appears to be local but is used later in lnd_import_export type(ESMF_Field) :: field_lnd type(ESMF_Field) :: field_ctsm type(ESMF_RouteHandle) :: rhandle_lnd2ctsm @@ -203,6 +203,7 @@ subroutine lnd_set_decomp_and_domain_from_readmesh(driver, vm, meshfile_lnd, mes end do ! Generate a new mesh on the gindex decomposition + ! NOTE: The distgrid_ctsm will be used later in lnd_import_export, even though it appears to just be local call t_startf('lnd_set_decomp_and_domain_from_readmesh: ESMF mesh on new decomposition') distGrid_ctsm = ESMF_DistGridCreate(arbSeqIndexList=gindex_ctsm, rc=rc) if (ChkErr(rc,__LINE__,u_FILE_u)) return @@ -298,18 +299,22 @@ subroutine from_readmesh_dealloc( rc ) deallocate(gindex_ocn) deallocate(gindex_ctsm) ! Destroy or release all of the ESMF objects - call ESMF_FieldRedistRelease( rhandle_lnd2ctsm, rc=rc) + call ESMF_FieldRedistRelease( rhandle_lnd2ctsm, noGarbage=no_esmf_garbage, rc=rc) if (chkerr(rc,__LINE__,u_FILE_u)) return + !-------------------------------------------------------------------------- + ! NOTE: We can't destroy the distgrid -- because it will be used later + ! As such we don't do the following... EBK 08/01/2025 !call ESMF_DistGridDestroy( distgrid_ctsm, rc=rc) !if (chkerr(rc,__LINE__,u_FILE_u)) return - call ESMF_FieldDestroy( field_lnd, rc=rc) + !-------------------------------------------------------------------------- + call ESMF_FieldDestroy( field_lnd, noGarbage=no_esmf_garbage, rc=rc) if (chkerr(rc,__LINE__,u_FILE_u)) return - call ESMF_FieldDestroy( field_ctsm, rc=rc) + call ESMF_FieldDestroy( field_ctsm, noGarbage=no_esmf_garbage, rc=rc) + if (chkerr(rc,__LINE__,u_FILE_u)) return + call ESMF_MeshDestroy( mesh_maskinput, noGarbage=no_esmf_garbage, rc=rc) + if (chkerr(rc,__LINE__,u_FILE_u)) return + call ESMF_MeshDestroy( mesh_lndinput, noGarbage=no_esmf_garbage, rc=rc) if (chkerr(rc,__LINE__,u_FILE_u)) return - !call ESMF_MeshDestroy( mesh_maskinput, rc=rc) - !if (chkerr(rc,__LINE__,u_FILE_u)) return - !call ESMF_MeshDestroy( mesh_lndinput, rc=rc) - !if (chkerr(rc,__LINE__,u_FILE_u)) return end subroutine from_readmesh_dealloc From ccdd13ceaa9a8ffa1d0d28a51468b7ebc079b7db Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Fri, 1 Aug 2025 17:55:14 -0600 Subject: [PATCH 36/37] Fix XML name for RTM_MODE --- .../testdefs/testmods_dirs/clm/run_self_tests/shell_commands | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands index f3144c219d..0d8a5d36e1 100755 --- a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands +++ b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands @@ -5,7 +5,7 @@ ./xmlchange ROF_NCPL='$ATM_NCPL' # Turn off ROF model when used with compsets that have them -./xmlchange ROF_MODE='NULL' +./xmlchange RTM_MODE='NULL' # Turn MEGAN off to run faster ./xmlchange CLM_BLDNML_OPTS='--no-megan' --append From 03722cdcb31aeb61c8e01f9f38cc22486472e50b Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Mon, 4 Aug 2025 15:07:30 -0600 Subject: [PATCH 37/37] Call shr_malloc_trim so that memory is released by the OS after the dealloc happens, this shows the dealloc actually releasing memory now --- src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 b/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 index 6cbdde43f6..01580db90d 100644 --- a/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 +++ b/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 @@ -56,7 +56,7 @@ subroutine lnd_set_decomp_and_domain_from_readmesh(driver, vm, meshfile_lnd, mes use decompMod , only : gindex_global, bounds_type, get_proc_bounds use clm_varpar , only : nlevsoi use clm_varctl , only : use_soil_moisture_streams - use proc_status_vm, only : prt_vm_status + use proc_status_vm, only : prt_vm_status, shr_malloc_trim use shr_mem_mod , only : shr_mem_getusage use shr_sys_mod , only : shr_sys_flush @@ -273,6 +273,7 @@ subroutine lnd_set_decomp_and_domain_from_readmesh(driver, vm, meshfile_lnd, mes ! Deallocate local pointer memory including ESMF objects call from_readmesh_dealloc( rc ) if (chkerr(rc,__LINE__,u_FILE_u)) return + call shr_malloc_trim() if(masterproc) then call prt_vm_status('CTSM: lnd_set_decomp_and_domain_from_readmesh: after deallocates')