Skip to content

Commit 8fceb72

Browse files
committed
Download check for cache directory
Add a marker .download file to validate the contents in cache directories. Previously only the existence of the directory was used, so if the download was aborted the cache directory had to be deleted manually if this occurred (with a likely cryptic error message). If the .download check file does not exist, the directory will be deleted and downloaded again. It is also possible to check the contents with a checksum. If not matching, the directory will be deleted and downloaded again. For Git repos the repos can be deleted if the status is not clean, a checksum is not relevant (but used in the tests).
1 parent 0bc73f4 commit 8fceb72

File tree

4 files changed

+313
-6
lines changed

4 files changed

+313
-6
lines changed

README.md

+14
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,12 @@ You can use `CPM_SOURCE_CACHE` on GitHub Actions workflows [cache](https://githu
196196
The directory where the version for a project is stored is by default the hash of the arguments to `CPMAddPackage()`.
197197
If for instance the patch command uses external files, the directory name can be set with the argument `CUSTOM_CACHE_KEY`.
198198

199+
It is possible to check the integrity of the downloaded content with a checksum by adding a [checksum command](test/unit/checksum_directory.sh) to `CPMAddPackage()`.
200+
Checksum validation can be done in two ways:
201+
202+
- Setting the option `CPM_CHECK_CACHE_CHECKSUM` to validate to the checksum calculated when downloading the project.
203+
- Providing the checksum in the call to `CPMAddPackage()`.
204+
199205
### CPM_DOWNLOAD_ALL
200206

201207
If set, CPM will forward all calls to `CPMFindPackage` as `CPMAddPackage`.
@@ -219,6 +225,14 @@ Note that this does not apply to dependencies that have been defined with a trut
219225
If set, CPM use additional directory level in cache to improve readability of packages names in IDEs like CLion. It changes cache structure, so all dependencies are downloaded again. There is no problem to mix both structures in one cache directory but then there may be 2 copies of some dependencies.
220226
This can also be set as an environmental variable.
221227

228+
### CPM_CHECK_CACHE_CHECKSUM
229+
230+
Enable validation of the checksum for a cache directory if a command to checksum the directory is provided. The validation is performed to a supplied checksum if provided, otherwise the checksum detected when downloading the dependency.
231+
232+
If `GIT_TAG` is set, `git-status` will check the status, checksum command is not required.
233+
234+
If the check fails, an existing directory will be deleted and downloaded again.
235+
222236
## Local package override
223237

224238
Library developers are often in the situation where they work on a locally checked out dependency at the same time as on a consumer project.

cmake/CPM.cmake

+91-6
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,11 @@ option(CPM_USE_NAMED_CACHE_DIRECTORIES
129129
"Use additional directory of package name in cache on the most nested level."
130130
$ENV{CPM_USE_NAMED_CACHE_DIRECTORIES}
131131
)
132+
option(
133+
CPM_CHECK_CACHE_CHECKSUM
134+
"If a package is stored in cache and there is a command to provide checksum, check the checksum when the cache dir exists."
135+
$ENV{CPM_CHECK_CACHE_CHECKSUM}
136+
)
132137

133138
set(CPM_VERSION
134139
${CURRENT_CPM_VERSION}
@@ -601,9 +606,10 @@ function(CPMAddPackage)
601606
EXCLUDE_FROM_ALL
602607
SOURCE_SUBDIR
603608
CUSTOM_CACHE_KEY
609+
CUSTOM_CACHE_CHECKSUM_VALUE
604610
)
605611

606-
set(multiValueArgs URL OPTIONS DOWNLOAD_COMMAND PATCHES)
612+
set(multiValueArgs URL OPTIONS DOWNLOAD_COMMAND PATCHES CUSTOM_CACHE_CHECKSUM_COMMAND)
607613

608614
cmake_parse_arguments(CPM_ARGS "" "${oneValueArgs}" "${multiValueArgs}" "${ARGN}")
609615

@@ -789,15 +795,69 @@ function(CPMAddPackage)
789795
get_filename_component(download_directory ${download_directory} ABSOLUTE)
790796
list(APPEND CPM_ARGS_UNPARSED_ARGUMENTS SOURCE_DIR ${download_directory})
791797

792-
if(CPM_SOURCE_CACHE)
793-
file(LOCK ${download_directory}/../cmake.lock)
798+
file(LOCK ${download_directory}/../cmake.lock)
799+
800+
if(EXISTS ${download_directory} AND NOT EXISTS ${download_directory}.download)
801+
message(
802+
WARNING
803+
"Cache for ${CPM_ARGS_NAME} is missing .download, downloading. (${download_directory}.download)"
804+
)
805+
file(REMOVE_RECURSE ${download_directory})
794806
endif()
795807

796-
if(EXISTS ${download_directory})
797-
if(CPM_SOURCE_CACHE)
798-
file(LOCK ${download_directory}/../cmake.lock RELEASE)
808+
if(EXISTS ${download_directory}
809+
AND CPM_ARGS_CUSTOM_CACHE_CHECKSUM_COMMAND
810+
AND (CPM_CHECK_CACHE_CHECKSUM OR DEFINED CPM_ARGS_CUSTOM_CACHE_CHECKSUM_VALUE)
811+
)
812+
if(CPM_ARGS_CUSTOM_CACHE_CHECKSUM_VALUE)
813+
# Explicit checksum provided, ignore value in .downloaded
814+
set(expected_checksum ${CPM_ARGS_CUSTOM_CACHE_CHECKSUM_VALUE})
815+
else()
816+
file(READ ${download_directory}.download expected_checksum)
817+
string(STRIP "${expected_checksum}" expected_checksum)
799818
endif()
800819

820+
if(expected_checksum)
821+
set(executeProcessExtraArgs "")
822+
if(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.19.0")
823+
list(APPEND executeProcessExtraArgs COMMAND_ERROR_IS_FATAL ANY)
824+
endif()
825+
execute_process(
826+
COMMAND ${CPM_ARGS_CUSTOM_CACHE_CHECKSUM_COMMAND}
827+
WORKING_DIRECTORY ${download_directory}
828+
OUTPUT_VARIABLE checksum
829+
OUTPUT_STRIP_TRAILING_WHITESPACE ${executeProcessExtraArgs}
830+
)
831+
if(NOT expected_checksum STREQUAL checksum)
832+
message(
833+
WARNING
834+
"Checksum mismatch for ${CPM_ARGS_NAME}, removing (${expected_checksum} != ${checksum})"
835+
)
836+
file(REMOVE_RECURSE ${download_directory})
837+
endif()
838+
else()
839+
message(
840+
WARNING
841+
"Checksum cannot be verified for ${CPM_ARGS_NAME}, no existing value (${expected_checksum})"
842+
)
843+
endif()
844+
endif()
845+
if(EXISTS ${download_directory}
846+
AND DEFINED CPM_ARGS_GIT_TAG
847+
AND NOT (PATCH_COMMAND IN_LIST CPM_ARGS_UNPARSED_ARGUMENTS)
848+
)
849+
# warn if cache has been changed since checkout
850+
cpm_check_git_working_dir_is_clean(${download_directory} ${CPM_ARGS_GIT_TAG} IS_CLEAN)
851+
if(NOT ${IS_CLEAN})
852+
message(WARNING "${CPM_INDENT} Cache for ${CPM_ARGS_NAME} (${download_directory}) is dirty")
853+
if(CPM_CHECK_CACHE_CHECKSUM OR DEFINED CPM_ARGS_CUSTOM_CACHE_CHECKSUM_VALUE)
854+
file(REMOVE_RECURSE ${download_directory})
855+
endif()
856+
endif()
857+
endif()
858+
if(EXISTS ${download_directory})
859+
# Directory content is considered OK
860+
file(LOCK ${download_directory}/../cmake.lock RELEASE)
801861
cpm_store_fetch_properties(
802862
${CPM_ARGS_NAME} "${download_directory}"
803863
"${CPM_FETCHCONTENT_BASE_DIR}/${lower_case_name}-build"
@@ -894,6 +954,31 @@ function(CPMAddPackage)
894954

895955
cpm_fetch_package("${CPM_ARGS_NAME}" ${DOWNLOAD_ONLY} populated ${CPM_ARGS_UNPARSED_ARGUMENTS})
896956
if(CPM_SOURCE_CACHE AND download_directory)
957+
if(${populated})
958+
if(CPM_ARGS_CUSTOM_CACHE_CHECKSUM_COMMAND)
959+
set(executeProcessExtraArgs "")
960+
if(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.19.0")
961+
list(APPEND executeProcessExtraArgs COMMAND_ERROR_IS_FATAL ANY)
962+
endif()
963+
execute_process(
964+
COMMAND ${CPM_ARGS_CUSTOM_CACHE_CHECKSUM_COMMAND}
965+
WORKING_DIRECTORY ${download_directory}
966+
OUTPUT_VARIABLE checksum
967+
OUTPUT_STRIP_TRAILING_WHITESPACE ${executeProcessExtraArgs}
968+
)
969+
if(CPM_ARGS_CUSTOM_CACHE_CHECKSUM_VALUE AND NOT CPM_ARGS_CUSTOM_CACHE_CHECKSUM_VALUE
970+
STREQUAL checksum
971+
)
972+
message(
973+
FATAL_ERROR
974+
"Checksum mismatch for ${CPM_ARGS_NAME} (${CPM_ARGS_CUSTOM_CACHE_CHECKSUM_VALUE} != ${checksum})"
975+
)
976+
endif()
977+
else()
978+
set(checksum "")
979+
endif()
980+
file(WRITE ${download_directory}.download ${checksum})
981+
endif()
897982
file(LOCK ${download_directory}/../cmake.lock RELEASE)
898983
endif()
899984
if(${populated} AND ${CMAKE_VERSION} VERSION_LESS "3.28.0")

test/unit/cache.cmake

+145
Original file line numberDiff line numberDiff line change
@@ -153,3 +153,148 @@ execute_process(
153153

154154
assert_equal(${ret} "0")
155155
assert_exists("${CPM_SOURCE_CACHE_DIR}/fibonacci/my_custom_unique_dir")
156+
157+
# Cache checksum
158+
159+
reset_test()
160+
set(FIBONACCI_VERSION 1.1)
161+
set(FIBONACCI_GIT_TAG "GIT_TAG e9ebf168ca0fffaa4ef8c6fefc6346aaa22f6ed5")
162+
set(TEST_CHECKSUM_DIR "${CPM_SOURCE_CACHE_DIR}/fibonacci/my_checksummed_dir")
163+
set(TEST_CHECKSUM_VALUE
164+
cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e
165+
)
166+
167+
set(CHECKSUM_COMMAND "${CMAKE_CURRENT_LIST_DIR}/checksum_directory.sh")
168+
set(INCORRECT_CHECKSUM_RESULT "1")
169+
set(IGNORE_CHECKSUM_TEST)
170+
if(CMAKE_HOST_WIN32)
171+
# checksum example is not adapted to Windows (Cygwin and msys could work though)
172+
set(CHECKSUM_COMMAND "")
173+
set(TEST_CHECKSUM_VALUE)
174+
set(IGNORE_CHECKSUM_TEST True)
175+
elseif(CMAKE_HOST_APPLE)
176+
set(TEST_CHECKSUM_VALUE
177+
cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e
178+
)
179+
endif()
180+
181+
# OK download
182+
183+
set(FIBONACCI_PACKAGE_ARGS
184+
"${FIBONACCI_GIT_TAG} CUSTOM_CACHE_KEY my_checksummed_dir CUSTOM_CACHE_CHECKSUM_COMMAND \"${CHECKSUM_COMMAND}\""
185+
)
186+
update_cmake_lists()
187+
188+
execute_process(
189+
COMMAND ${CMAKE_COMMAND} -E env "CPM_SOURCE_CACHE=${CPM_SOURCE_CACHE_DIR}" ${CMAKE_COMMAND}
190+
"-S${CMAKE_CURRENT_LIST_DIR}/remote_dependency" "-B${TEST_BUILD_DIR}" RESULT_VARIABLE ret
191+
)
192+
193+
assert_equal(${ret} "0")
194+
assert_exists("${TEST_CHECKSUM_DIR}.download")
195+
file(READ "${TEST_CHECKSUM_DIR}.download" chksum)
196+
assert_equal("${chksum}" "${TEST_CHECKSUM_VALUE}")
197+
198+
# Test download again if .download file is missing
199+
200+
file(REMOVE "${TEST_CHECKSUM_DIR}.download")
201+
file(REMOVE "${TEST_CHECKSUM_DIR}/include/fibonacci.h")
202+
203+
set(FIBONACCI_PACKAGE_ARGS
204+
"${FIBONACCI_GIT_TAG} CUSTOM_CACHE_KEY my_checksummed_dir CUSTOM_CACHE_CHECKSUM_COMMAND \"${CHECKSUM_COMMAND}\""
205+
)
206+
update_cmake_lists()
207+
208+
execute_process(
209+
COMMAND ${CMAKE_COMMAND} -E env "CPM_SOURCE_CACHE=${CPM_SOURCE_CACHE_DIR}" ${CMAKE_COMMAND}
210+
"-S${CMAKE_CURRENT_LIST_DIR}/remote_dependency" "-B${TEST_BUILD_DIR}" RESULT_VARIABLE ret
211+
)
212+
213+
assert_equal(${ret} "0")
214+
assert_exists("${TEST_CHECKSUM_DIR}.download")
215+
assert_exists("${TEST_CHECKSUM_DIR}/include/fibonacci.h")
216+
217+
# check checksum for download
218+
219+
set(FIBONACCI_PACKAGE_ARGS
220+
"${FIBONACCI_GIT_TAG} CUSTOM_CACHE_KEY my_checksummed_dir CUSTOM_CACHE_CHECKSUM_COMMAND \"${CHECKSUM_COMMAND}\""
221+
)
222+
update_cmake_lists()
223+
224+
execute_process(
225+
COMMAND ${CMAKE_COMMAND} -E env "CPM_SOURCE_CACHE=${CPM_SOURCE_CACHE_DIR}" ${CMAKE_COMMAND}
226+
"-S${CMAKE_CURRENT_LIST_DIR}/remote_dependency" "-B${TEST_BUILD_DIR}" RESULT_VARIABLE ret
227+
)
228+
229+
assert_equal(${ret} "0")
230+
231+
# check checksum for download, provided
232+
233+
set(FIBONACCI_PACKAGE_ARGS
234+
"${FIBONACCI_GIT_TAG} CUSTOM_CACHE_KEY my_checksummed_dir CUSTOM_CACHE_CHECKSUM_COMMAND \"${CHECKSUM_COMMAND}\" CUSTOM_CACHE_CHECKSUM_VALUE ${TEST_CHECKSUM_VALUE}"
235+
)
236+
update_cmake_lists()
237+
238+
execute_process(
239+
COMMAND ${CMAKE_COMMAND} -E env "CPM_SOURCE_CACHE=${CPM_SOURCE_CACHE_DIR}" ${CMAKE_COMMAND}
240+
"-S${CMAKE_CURRENT_LIST_DIR}/remote_dependency" "-B${TEST_BUILD_DIR}" RESULT_VARIABLE ret
241+
)
242+
243+
assert_equal(${ret} "0")
244+
245+
# check checksum for download, provided incorrect, this will print a fatal_error (red) error to the
246+
# console
247+
248+
set(FIBONACCI_PACKAGE_ARGS
249+
"${FIBONACCI_GIT_TAG} CUSTOM_CACHE_KEY my_checksummed_dir CUSTOM_CACHE_CHECKSUM_COMMAND \"${CHECKSUM_COMMAND}\" CUSTOM_CACHE_CHECKSUM_VALUE invalid_checksum_value"
250+
)
251+
update_cmake_lists()
252+
253+
execute_process(
254+
COMMAND ${CMAKE_COMMAND} -E env "CPM_SOURCE_CACHE=${CPM_SOURCE_CACHE_DIR}" ${CMAKE_COMMAND}
255+
"-S${CMAKE_CURRENT_LIST_DIR}/remote_dependency" "-B${TEST_BUILD_DIR}" RESULT_VARIABLE ret
256+
)
257+
258+
if(NOT IGNORE_CHECKSUM_TEST)
259+
assert_equal(${ret} "1")
260+
endif()
261+
262+
# redownload when checksum is changed
263+
264+
set(FIBONACCI_PACKAGE_ARGS
265+
"${FIBONACCI_GIT_TAG} CUSTOM_CACHE_KEY my_checksummed_dir CUSTOM_CACHE_CHECKSUM_COMMAND \"${CHECKSUM_COMMAND}\" CUSTOM_CACHE_CHECKSUM_VALUE ${TEST_CHECKSUM_VALUE}"
266+
)
267+
update_cmake_lists()
268+
269+
# dummy change, to trigger checksum mismatch
270+
file(WRITE "${TEST_CHECKSUM_DIR}/fail_checksum.txt" "dummy")
271+
272+
execute_process(
273+
COMMAND ${CMAKE_COMMAND} -E env "CPM_SOURCE_CACHE=${CPM_SOURCE_CACHE_DIR}" ${CMAKE_COMMAND}
274+
"-S${CMAKE_CURRENT_LIST_DIR}/remote_dependency" "-B${TEST_BUILD_DIR}" RESULT_VARIABLE ret
275+
)
276+
277+
assert_equal(${ret} "0")
278+
if(NOT IGNORE_CHECKSUM_TEST)
279+
assert_not_exists("${TEST_CHECKSUM_DIR}/fail_checksum.txt")
280+
endif()
281+
282+
# redownload when checksum is changed
283+
284+
set(FIBONACCI_PACKAGE_ARGS
285+
"${FIBONACCI_GIT_TAG} CUSTOM_CACHE_KEY my_checksummed_dir CUSTOM_CACHE_CHECKSUM_VALUE ${TEST_CHECKSUM_VALUE}"
286+
)
287+
update_cmake_lists()
288+
289+
# dummy change, to trigger checksum mismatch
290+
file(WRITE "${TEST_CHECKSUM_DIR}/fail_checksum.txt" "dummy")
291+
292+
execute_process(
293+
COMMAND ${CMAKE_COMMAND} -E env "CPM_SOURCE_CACHE=${CPM_SOURCE_CACHE_DIR}" ${CMAKE_COMMAND}
294+
"-S${CMAKE_CURRENT_LIST_DIR}/remote_dependency" "-B${TEST_BUILD_DIR}" RESULT_VARIABLE ret
295+
)
296+
297+
assert_equal(${ret} "0")
298+
if(NOT IGNORE_CHECKSUM_TEST)
299+
assert_not_exists("${TEST_CHECKSUM_DIR}/fail_checksum.txt")
300+
endif()

test/unit/checksum_directory.sh

+63
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
#!/usr/bin/env bash
2+
3+
# Script to checksum contents recursively in a directory
4+
5+
set -o errexit
6+
set -o nounset
7+
8+
function usage {
9+
echo
10+
echo "Checksum the contents of a directory"
11+
echo "Usage: $0 [-d <directory>]"
12+
echo ""
13+
echo " -d directory Default '.'"
14+
echo " -h Help, this message"
15+
echo " -t Use alternative tar method (requires zstd binary)"
16+
echo " -v Verbose output"
17+
}
18+
19+
dir=.
20+
use_tar=
21+
# sha512 is faster than sha256 for large files, sha1 is even faster
22+
SHA_ALGORITHM=sha512sum
23+
if [[ "$OSTYPE" == "darwin"* ]]; then
24+
# Some overrides required for macos
25+
# Note also that 'xargs --max-procs' must be written as 'xargs -P'
26+
SHA_ALGORITHM="shasum -a 512"
27+
alias nproc="sysctl -n hw.logicalcpu"
28+
fi
29+
30+
while getopts "d:htv" o; do
31+
case "${o}" in
32+
d)
33+
dir=${OPTARG}
34+
;;
35+
h)
36+
usage
37+
exit 0
38+
;;
39+
t)
40+
use_tar=1
41+
;;
42+
v)
43+
set -x
44+
;;
45+
*)
46+
echo "Incorrect argument switch"
47+
usage
48+
exit 1
49+
;;
50+
esac
51+
done
52+
shift "$((OPTIND-1))"
53+
54+
cd $dir
55+
if [ ! -z $use_tar ]; then
56+
# This is faster for single threads but requires more memory and requires the separate zstd binary
57+
# For a 3 GB data this is 3s vs 'find' below: 5s (one thread) below, 2.5s with 28 threads, 0.7s with 100 files on each line
58+
# Without --fast, just ZSTD_CLEVEL=1 ZSTD_NBTHREADS=0 is about 6s
59+
tar -I "zstd --fast -1 -T0" -cf - . | $SHA_ALGORITHM | cut -f1 -d ' '
60+
else
61+
# In general, there is no point in checksumming Git repos, filter .git here as this is used in tests
62+
find . \( -name .git -prune \) -o -type f -print0 | xargs -n 100 -P=$(nproc) -0 $SHA_ALGORITHM | sort -k 2 | $SHA_ALGORITHM | cut -f1 -d ' '
63+
fi

0 commit comments

Comments
 (0)