Skip to content

Commit e282594

Browse files
authored
Merge pull request #309 from LLNL/feature/performance_tests
CI Overhaul
2 parents 4e27343 + 1673bb2 commit e282594

32 files changed

+831
-1342
lines changed

.gitlab/machines.yml

+11-9
Original file line numberDiff line numberDiff line change
@@ -4,25 +4,27 @@
44
.on_ruby:
55
tags:
66
- ruby
7-
- shell
7+
- batch
88
variables:
9+
SCHEDULER_ACTION: allocate
10+
SCHEDULER_PARAMETERS: "--res=ci --exclusive -N 2 -t 120"
11+
NPROC: 112
912
HOSTNAME: 'ruby'
10-
PARTITION: pdebug
11-
BUILD_ALLOC: srun -N 1 -c 36 -p pdebug -t 60
12-
TEST_ALLOC: ''
13-
CLEAN_ALLOC: srun -n 20
13+
timeout: 120 minutes
1414
extends: [.on_toss_4_x86]
1515

1616
.on_lassen:
1717
tags:
1818
- lassen
19-
- shell
19+
- batch
2020
variables:
21+
SCHEDULER_PARAMETERS: "-nnodes 1 -W 150 -q pci -alloc_flags atsdisable"
22+
LSB_JOB_STARTER: "ENVIRONMENT=BATCH /usr/tcetmp/bin/bsub_job_starter %USRCMD"
23+
NPROC: 40
24+
ENVIRONMENT: "BATCH"
2125
HOSTNAME: 'lassen'
22-
BUILD_ALLOC: lalloc 1 -W 60
23-
TEST_ALLOC: $BUILD_ALLOC
24-
CLEAN_ALLOC: lalloc 1 lrun -n 20
2526
LC_MODULES: "cuda/11.1.0"
27+
timeout: 150 minutes
2628
extends: [.on_blueos_3_ppc64]
2729

2830
# ------------------------------------------------------------------------------

.gitlab/scripts.yml

+12-10
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
- cd $CI_BUILD_DIR
1515
- echo $SPEC
1616

17-
- $BUILD_ALLOC ./$SCRIPT_DIR/gitlab/build_and_install.py --spec="$SPEC" --tpls-only
17+
- ./$SCRIPT_DIR/gitlab/build_and_install.py --spec="$SPEC" --tpls-only
1818
artifacts:
1919
paths:
2020
- ci-dir.txt
@@ -27,7 +27,7 @@
2727
script:
2828
- CI_BUILD_DIR=$(cat ci-dir.txt)
2929
- cd $CI_BUILD_DIR && cat job-name.txt
30-
- $BUILD_ALLOC ./$SCRIPT_DIR/devtools/host-config-build.py --host-config gitlab.cmake --build $EXTRA_CMAKE_ARGS
30+
- ./$SCRIPT_DIR/devtools/host-config-build.py --no-clean --build --nprocs $NPROC --host-config gitlab.cmake $EXTRA_CMAKE_ARGS
3131
artifacts:
3232
paths:
3333
- ci-dir.txt
@@ -55,7 +55,7 @@
5555
- CI_BUILD_DIR=$(cat ci-dir.txt)
5656
- cd $CI_BUILD_DIR && cat job-name.txt
5757

58-
- ./build_gitlab/install/spheral $SCRIPT_DIR/gitlab/run_ats.py --test-alloc "$TEST_ALLOC" --ats-file $ATS_FILE --ci-build-dir $CI_BUILD_DIR || exit_code=$?
58+
- ./build_gitlab/install/spheral-ats --ciRun ./build_gitlab/install/$ATS_FILE || exit_code=$?
5959
- cp -r test-logs $CI_PROJECT_DIR
6060
- exit $exit_code
6161
artifacts:
@@ -76,7 +76,7 @@
7676

7777
- ml load mpifileutils
7878
- cd $SPHERAL_BUILDS_DIR
79-
- $CLEAN_ALLOC drm $CI_BUILD_DIR/..
79+
- drm $CI_BUILD_DIR/..
8080

8181
# ------------------------------------------------------------------------------
8282
# Shared TPL scripts.
@@ -85,15 +85,15 @@
8585
.update_tpls:
8686
stage: update_tpls
8787
script:
88-
- $BUILD_ALLOC ./$SCRIPT_DIR/devtools/tpl-manager.py --spec-list="$SCRIPT_DIR/devtools/spec-list.json" --spheral-spack-dir=$UPSTREAM_DIR
88+
- ./$SCRIPT_DIR/devtools/tpl-manager.py --no-upstream --spec-list="$SCRIPT_DIR/devtools/spec-list.json" --spheral-spack-dir=$UPSTREAM_DIR
8989

9090
.toss_update_permissions:
9191
stage: update_permissions
9292
variables:
9393
GIT_STRATEGY: none
9494
script:
9595
- ml load mpifileutils
96-
- srun -N 1 -p $PARTITION -n 20 -t 10 dchmod --mode go+rx $UPSTREAM_DIR
96+
- dchmod --mode go+rx $UPSTREAM_DIR
9797

9898
# ------------------------------------------------------------------------------
9999
# Production Installation scripts
@@ -117,7 +117,7 @@
117117
- INSTALL_DIR=/usr/gapps/Spheral/$SYS_TYPE/spheral-$SPHERAL_REV_STR
118118
- DEV_PKG_NAME=$SYS_TYPE-spheral-dev-pkg-$SPHERAL_REV_STR
119119

120-
- env SPHERAL_REV_STR=$SPHERAL_REV_STR INSTALL_DIR=$INSTALL_DIR SPEC=$SPEC SPACK_PKG_NAME=$SPACK_PKG_NAME BUILD_ALLOC="$BUILD_ALLOC" SCRIPT_DIR=$SCRIPT_DIR
120+
- env SPHERAL_REV_STR=$SPHERAL_REV_STR INSTALL_DIR=$INSTALL_DIR SPEC=$SPEC SPACK_PKG_NAME=$SPACK_PKG_NAME SCRIPT_DIR=$SCRIPT_DIR
121121
bash ./$SCRIPT_DIR/lc/generate-buildcache.sh
122122

123123
- echo $INSTALL_DIR &> install-dir.txt
@@ -131,6 +131,8 @@
131131

132132
.install_dev_pkg:
133133
stage: install_production
134+
variables:
135+
GIT_STRATEGY: none
134136
script:
135137
- INSTALL_DIR=$(cat install-dir.txt)
136138
- DEV_PKG_NAME=$(cat dev-pkg-name.txt)
@@ -139,7 +141,7 @@
139141
- tar -xzf $DEV_PKG_NAME.tar.gz
140142
- cd $DEV_PKG_NAME
141143

142-
- env INSTALL_DIR=$INSTALL_DIR SPEC=$SPEC SPACK_PKG_NAME=$SPACK_PKG_NAME BUILD_ALLOC="$BUILD_ALLOC" SCRIPT_DIR=$SCRIPT_DIR
144+
- env INSTALL_DIR=$INSTALL_DIR SPEC=$SPEC SPACK_PKG_NAME=$SPACK_PKG_NAME BUILD_ALLOC="" SCRIPT_DIR=$SCRIPT_DIR
143145
bash ./$SCRIPT_DIR/lc/install-from-dev-pkg.sh
144146

145147
artifacts:
@@ -158,7 +160,7 @@
158160
- chmod go+r /usr/gapps/Spheral/modulefiles/Spheral/"$ALIAS".lua
159161

160162
- ml load mpifileutils
161-
- srun -N 1 -p $PARTITION -n 20 -t 10 dchmod --mode go+rx $INSTALL_DIR
163+
- dchmod --mode go+rx $INSTALL_DIR
162164
- ln -sfn $INSTALL_DIR /usr/gapps/Spheral/$SYS_TYPE/$ALIAS
163165

164166

@@ -181,7 +183,7 @@
181183
- echo $DIR_LIST
182184

183185
- ml load mpifileutils
184-
- if [[ $DIR_LIST ]]; then $CLEAN_ALLOC drm $DIR_LIST; else echo "No directories to remove at this time."; fi
186+
- if [[ $DIR_LIST ]]; then drm $DIR_LIST; else echo "No directories to remove at this time."; fi
185187
when: always
186188

187189
.merge_pr_rule:

CMakeLists.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ include(cmake/SpheralVersion.cmake)
55
project(spheral LANGUAGES C CXX Fortran VERSION ${SPHERAL_VERSION})
66

77
set(SPHERAL_ROOT_DIR ${CMAKE_CURRENT_SOURCE_DIR} CACHE PATH "Path to Spheral source directory")
8-
set(SPHERAL_TEST_INSTALL_PREFIX ${CMAKE_INSTALL_PREFIX})
8+
set(SPHERAL_TEST_INSTALL_PREFIX ${CMAKE_INSTALL_PREFIX}/tests)
99

1010
include(cmake/SetupSpheral.cmake)
1111

Dockerfile

+9-1
Original file line numberDiff line numberDiff line change
@@ -75,5 +75,13 @@ RUN make install
7575
# Run ATS testing suite.
7676
WORKDIR ../install
7777
ENV MPLBACKEND=agg
78-
RUN ./spheral-atstest --filter="level<100" tests/integration.ats
78+
79+
# ATS currently does not allow us to run in parallel for regular linux machines
80+
# If it did, we would need some of the following commands
81+
#RUN export OMP_NUM_THREADS=1
82+
#RUN export MACHINE_TYPE="winParallel"
83+
#RUN ./spheral-ats --level 99 --mpiexe mpiexec --npMax $JCXX tests/integration.ats
84+
85+
# Instead, we will just run it normally
86+
RUN ./spheral-ats --level 99 tests/integration.ats
7987
# -----------------------------------------------------------------------------

RELEASE_NOTES.md

+6-2
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,10 @@ Notable changes include:
2525
* Physics::postStateUpdate now returns a bool indicating if boundary conditions should be enforced again.
2626
* Physics packages can now have Physics sub-packages, which can be run before or after the main package. The SpheralController
2727
now checks for these packages and adds them to the physics package list as needed.
28-
* Physics packages can indicate if they require Voronoi cell information be available. If so, a new package which computes and
28+
* Physics packages can indicate if they require Voronoi cell information be available. If so, a new package which computes and
2929
updates the Voronoi information is automatically added to the package list by the SpheralController (similar to how the
3030
Reproducing Kernel corrections are handled).
31+
* Command line options are now consistent. Default values of a string "None" are no longer allowed and any input through the command line of "None" will become the python NoneType None.
3132
* Cleaned up use of std::any in State objects using a visitor pattern to be rigorous ensuring all state entries are handled properly
3233
during assignement, equality, and cloning operations. This is intended to help ensure our Physics advance during time integration
3334
is correct.
@@ -43,13 +44,16 @@ Notable changes include:
4344
* ENABLE\_DEV\_BUILD can now export targets properly.
4445
* Added a GCC flag to prevent building variable tracking symbols when building PYB11 modules. This is unnecessary, and
4546
on some platforms trying to build such symbols is very expensive and in some cases fails.
47+
* Consolidates lcatstest.in and run\_ats.py into a single spheral\_ats.py script.
48+
* SPHERAL\_TEST\_INSTALL\_PREFIX now includes the tests directory.
49+
* Removed most configured files and added a SpheralConfigs.py file to use at runtime instead.
4650
4751
* Bug Fixes / improvements:
4852
* Wrappers for MPI calls are simplified and improved.
4953
* Time step estimate due to velocity divergence in RZ space has been fixed.
5054
* Fixed tolerances for ANEOS equation of state temperature lookup
5155
* Clang C++ warnings have eliminated, so the Clang CI tests have been updated to treat warnings as errors.
52-
* Fix for installing libraries when building individual package WITH ENABLE_DEV_BUILD=On.
56+
* Fix for installing libraries when building individual package with ENABLE\_DEV\_BUILD=On.
5357
* Bugfix for RZ solid CRKSPH with compatible energy.
5458
* Parsing of None string now always becomes None python type. Tests have been updated accordingly.
5559
* IO for checkpoints and visuzalization can now be properly turned off through SpheralController input options.

cmake/SetupSpheral.cmake

+8-33
Original file line numberDiff line numberDiff line change
@@ -153,40 +153,15 @@ endif()
153153
# Build C++ tests and install tests to install directory
154154
#-------------------------------------------------------------------------------
155155
if (ENABLE_TESTS)
156+
install(DIRECTORY ${SPHERAL_ROOT_DIR}/tests/
157+
USE_SOURCE_PERMISSIONS
158+
DESTINATION "${SPHERAL_TEST_INSTALL_PREFIX}"
159+
PATTERN "*CMakeLists.txt*" EXCLUDE
160+
PATTERN "*.cmake" EXCLUDE
161+
PATTERN "*.in" EXCLUDE
162+
PATTERN "*.pyc" EXCLUDE
163+
PATTERN "*~" EXCLUDE)
156164
add_subdirectory(${SPHERAL_ROOT_DIR}/tests/unit)
157-
158-
# A macro to preserve directory structure when installing files
159-
macro(install_with_directory)
160-
set(optionsArgs "")
161-
set(oneValueArgs SOURCE DESTINATION)
162-
set(multiValueArgs FILES)
163-
cmake_parse_arguments(CAS "${optionsArgs}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN} )
164-
foreach(FILE ${CAS_FILES})
165-
get_filename_component(DIR ${FILE} DIRECTORY)
166-
INSTALL(FILES ${CAS_SOURCE}/${FILE} DESTINATION ${CAS_DESTINATION}/${DIR})
167-
endforeach()
168-
endmacro(install_with_directory)
169-
170-
# Find the test files we want to install
171-
set(test_files1 "")
172-
if (EXISTS "${CMAKE_SOURCE_DIR}/.git")
173-
execute_process(
174-
COMMAND git ls-files tests
175-
WORKING_DIRECTORY ${SPHERAL_ROOT_DIR}
176-
OUTPUT_VARIABLE test_files1)
177-
else()
178-
execute_process(
179-
COMMAND find tests -type f
180-
WORKING_DIRECTORY ${SPHERAL_ROOT_DIR}
181-
OUTPUT_VARIABLE test_files1)
182-
endif()
183-
string(REPLACE "\n" " " test_files ${test_files1})
184-
separate_arguments(test_files)
185-
list(REMOVE_ITEM test_files tests/unit/CXXTests/runCXXTests.ats)
186-
install_with_directory(
187-
FILES ${test_files}
188-
SOURCE ${SPHERAL_ROOT_DIR}
189-
DESTINATION ${SPHERAL_TEST_INSTALL_PREFIX})
190165
endif()
191166

192167
include(${SPHERAL_ROOT_DIR}/cmake/SpheralConfig.cmake)
+63-39
Original file line numberDiff line numberDiff line change
@@ -1,95 +1,119 @@
1-
Code Performance Diagnostics
2-
############################
1+
Code Debugging and Diagnostics
2+
##############################
33

4-
Spheral uses Caliper to preform code diagnostics, such as timing. To enable this functionality in the code, Spheral needs to be configured with ``ENABLE_TIMER=ON``. Otherwise, the timing regions are no-ops for improved preformance.
4+
Valgrind
5+
========
6+
7+
We advise using Valgrind to check memory leaks when doing development on Spheral.
8+
When using Valgrind to check Spheral, be sure to use the provided suppression file
59
::
610

7-
./scripts/devtools/host-config-build.py <sys_type>-<spec>.cmake -DENABLE_TIMER=ON
11+
valgrind --suppressions=./scripts/devtools/valgrind_python_suppression ./spheral
812

913

10-
Querying using Caliper
11-
======================
14+
Using Caliper
15+
=============
16+
17+
Spheral uses Caliper to preform code diagnostics, such as timing. To enable this functionality in the code, Spheral needs to be configured with ``ENABLE_TIMER=ON``. Otherwise, the timing regions are no-ops for improved preformance.
18+
::
19+
20+
./scripts/devtools/host-config-build.py <sys_type>-<spec>.cmake -DENABLE_TIMER=ON
1221

1322
Caliper is configured and started through the ``cali::ConfigManager``.
1423
The ``cali::ConfigManager`` is wrapped in a ``TimerMgr`` singleton class, which has a python interface.
1524

1625
.. note::
17-
``TimerMgr`` is initialized and started during ``commandLine()`` in ``src/SimulationControl/SpheralOptionParser.py``. This is because ``commandLine()`` is almost always invoked directly near the start of a problem. However, if ``commandLine()`` is not called, the timers would need to be configured and started directly using the ``TimerMgr`` class. See :ref:`below <manual_caliper>` for more details.
26+
``TimerMgr`` is initialized in ``src/SimulationControl/SpheralTimingParser.py`` which is called during ``commandLine()`` in ``src/SimulationControl/SpheralOptionParser.py``. This is because ``commandLine()`` is almost always invoked directly near the start of a problem. However, if ``commandLine()`` is not called, the timer manager would need to be configured and started directly using the ``TimerMgr`` class. See :ref:`below <manual_caliper>` for more details.
1827

1928
By default, the Caliper configuration is set to ``spot`` and outputs Caliper files (``.cali``).
20-
For the default configuration, the Caliper files are named based on what file is being run, for example:
21-
::
2229

23-
python Noh-cylindrical-2d.py
30+
There are many different Caliper configurations to view various information. Here are some extra links for those who want to read or experiment with other features in Caliper that can be incorporated into Spheral:
2431

25-
will produce a timing file called ``Noh-cylindrical-2d_YEAR_MONTH_DATE_TIME.cali`` where the file name includes the current date and time.
32+
* `Configuration basics <https://software.llnl.gov/Caliper/CaliperBasics.html#more-on-configurations>`_
33+
* `Builtin Configuration <https://software.llnl.gov/Caliper/BuiltinConfigurations.html>`_
34+
* `Manual Configuration <https://software.llnl.gov/Caliper/configuration.html>`_
35+
* `Output Format <https://software.llnl.gov/Caliper/OutputFormats.html>`_
2636

27-
The Caliper file name can be specified using the command line
28-
::
37+
Caliper and Adiak Options
38+
-------------------------
2939

30-
python Noh-cylindrical-2d.py --caliperFilename 'new_test_name.cali'
40+
.. option:: --caliperFilename
3141

32-
Different Caliper configurations can be set at the command line using ``--caliperConfig`` like so
33-
::
42+
Name of Caliper timing file. Should include file extensions. Optional, default: ``name_of_file_YEAR_MONTH_DATE_TIME.cali``.
3443

35-
python Noh-cylindrical-2d.py --caliperConfig 'runtime-report(output=time.txt),calc.inclusive,region.count'
44+
.. option:: --caliperConfig CONFIG_STR
3645

37-
.. note::
38-
The above configuration produces timing results similar to the previous ``Spheral::Timer`` method. This results in a file named ``time.txt`` with cumulative times for the nested regions as well as a count of how many times each region ran.
46+
Specify a built-in Caliper configuration or turn off timers with ``none``. Optional, default: ``spot``.
3947

40-
Similarly, a non-default Caliper configuration can be read in from a JSON file using ``--caliperConfigJSON`` and providing the file name.
41-
Lastly, Caliper timers can be turned off using ``--caliperConfig none``.
48+
**Example**:
49+
::
4250

43-
There are many different Caliper configurations to view various information. Here are some extra links for those who want to read or experiment with other features in Caliper that can be incorporated into Spheral:
51+
./spheral ex_prog.py --caliperConfig 'runtime-report(output=time.txt),calc.inclusive,region.count'
4452

45-
* `Configuration basics <https://software.llnl.gov/Caliper/CaliperBasics.html#more-on-configurations>`_
46-
* `Builtin Configuration <https://software.llnl.gov/Caliper/BuiltinConfigurations.html>`_
47-
* `Manual Configuration <https://software.llnl.gov/Caliper/configuration.html>`_
48-
* `Output Format <https://software.llnl.gov/Caliper/OutputFormats.html>`_
53+
.. note::
54+
The configuration in the example above produces timing results similar to the previous ``Spheral::Timer`` method. This results in a file named ``time.txt`` with cumulative times for the nested regions as well as a count of how many times each region ran.
4955

56+
.. option:: --caliperConfigJSON JSON_FILE
5057

51-
Adding Region Timers in C++
52-
===========================
58+
Specify a JSON file containing a non-default Caliper configuration. Optional.
5359

54-
So far there are two different types of regions in Spheral, using the following macros:
55-
::
60+
.. option:: --adiakData ADIAK_DATA_STR
5661

57-
TIME_FUNCTION
62+
Specify any Adiak data directly in the command line. Must be a string in key:value format, separated by commas. Optional.
5863

59-
or
64+
**Example**:
65+
::
6066

61-
::
67+
./spheral ex_prog.py --adiakData "test_name: the_cheat, test_num:10"
6268

63-
TIME_BEGIN("timer_name")
64-
TIME_END("timer_name")
69+
.. note::
70+
By default, all ``commandLine()`` inputs are added as Adiak metadata. ``--adiakData`` are for metadata that does not come through Spheral command line arguments. Adiak metadata can also be added through the python interface. See :ref:`below <python_adiak>` for more details.
6571

6672

73+
Adding Region Timers in C++
74+
---------------------------
75+
76+
The following macros are used to create timing regions in the Spheral C++ interface:
77+
6778
- ``TIME_FUNCTION`` can be added to the very beginning of a function and creates a region for the entire function using the function's name. ``TIME_FUNCTION`` uses just the function name and no class or parameter information, so be careful when using this method with functions that could share names.
6879

6980
- ``TIME_BEGIN("timer_name")`` and ``TIME_END("timer_name")`` create a region between the two different calls and use the string (in this case ``timer_name``) as the name.
7081

7182

7283
Adding Region Timers in Python
73-
==============================
84+
------------------------------
7485

7586
Region timers can be added inside the python code using the following function calls:
7687
::
7788

89+
from SpheralUtilities import TimerMgr
7890
TimerMgr.timer_start("timer_name")
7991
some_function_call()
8092
TimerMgr.timer_end("timer_name")
8193

8294
.. note::
83-
IMPORTANT: All timers must have both a start and end call. Otherwise, memory issues will occur.
95+
All timers must have both a start and end call. Otherwise, memory issues will occur.
96+
97+
.. _python_adiak:
98+
99+
Adding Adiak Metadata in Python
100+
-------------------------------
101+
102+
Adiak metadata can be added inside python code using the following function calls:
103+
104+
.. code-block:: python
105+
106+
adiak_values("value_name", value)
84107
85108
.. _manual_caliper:
86109

87110
Starting Caliper Manually
88-
========================
111+
=========================
89112

90-
As mentioned above, Caliper (not an individual Caliper timer) is normally configured and started in ``commandLine()`` python routine. However, Caliper can be directly configured and started through the python interface, if desired. This can be done by putting the following into the python file:
113+
As mentioned above, the Caliper timing manager is normally configured and started in the ``commandLine()`` routine. However, Caliper can be directly configured and started through the python interface, if desired. This can be done by putting the following into the python file:
91114
::
92115

116+
from SpheralUtilities import TimerMgr
93117
caliper_config = "some_configuration(output=some_filename.txt)"
94118
TimerMgr.add(caliper_config)
95119
TimerMgr.start()

0 commit comments

Comments
 (0)