diff --git a/cmake/CPM.cmake b/cmake/CPM.cmake index bee6131c..2d151bf9 100644 --- a/cmake/CPM.cmake +++ b/cmake/CPM.cmake @@ -202,6 +202,60 @@ function(cpm_package_name_from_git_uri URI RESULT) endif() endfunction() +# Find the shortest hash that can be used eg, if origin_hash is +# cccb77ae9609d2768ed80dd42cec54f77b1f1455 the following files will be checked, until one is found +# that is either empty (allowing us to assign origin_hash), or whose contents matches ${origin_hash} +# +# * .../cccb.hash +# * .../cccb77ae.hash +# * .../cccb77ae9609.hash +# * .../cccb77ae9609d276.hash +# * etc +# +# We will be able to use a shorter path with very high probability, but in the (rare) event that the +# first couple characters collide, we will check longer and longer substrings. +function(cpm_get_shortest_hash source_cache_dir origin_hash short_hash_output_var) + # for compatibility with caches populated by a previous version of CPM, check if a directory using + # the full hash already exists + if(EXISTS "${source_cache_dir}/${origin_hash}") + set(${short_hash_output_var} + "${origin_hash}" + PARENT_SCOPE + ) + return() + endif() + + foreach(len RANGE 4 40 4) + string(SUBSTRING "${origin_hash}" 0 ${len} short_hash) + set(hash_lock ${source_cache_dir}/${short_hash}.lock) + set(hash_fp ${source_cache_dir}/${short_hash}.hash) + # Take a lock, so we don't have a race condition with another instance of cmake. We will release + # this lock when we can, however, if there is an error, we want to ensure it gets released on + # it's own on exit from the function. + file(LOCK ${hash_lock} GUARD FUNCTION) + + # Load the contents of .../${short_hash}.hash + file(TOUCH ${hash_fp}) + file(READ ${hash_fp} hash_fp_contents) + + if(hash_fp_contents STREQUAL "") + # Write the origin hash + file(WRITE ${hash_fp} ${origin_hash}) + file(LOCK ${hash_lock} RELEASE) + break() + elseif(hash_fp_contents STREQUAL origin_hash) + file(LOCK ${hash_lock} RELEASE) + break() + else() + file(LOCK ${hash_lock} RELEASE) + endif() + endforeach() + set(${short_hash_output_var} + "${short_hash}" + PARENT_SCOPE + ) +endfunction() + # Try to infer package name and version from a url function(cpm_package_name_and_ver_from_url url outName outVer) if(url MATCHES "[/\\?]([a-zA-Z0-9_\\.-]+)\\.(tar|tar\\.gz|tar\\.bz2|zip|ZIP)(\\?|/|$)") @@ -806,9 +860,19 @@ function(CPMAddPackage) set(download_directory ${CPM_SOURCE_CACHE}/${lower_case_name}/${CPM_ARGS_CUSTOM_CACHE_KEY}) elseif(CPM_USE_NAMED_CACHE_DIRECTORIES) string(SHA1 origin_hash "${origin_parameters};NEW_CACHE_STRUCTURE_TAG") + cpm_get_shortest_hash( + "${CPM_SOURCE_CACHE}/${lower_case_name}" # source cache directory + "${origin_hash}" # Input hash + origin_hash # Computed hash + ) set(download_directory ${CPM_SOURCE_CACHE}/${lower_case_name}/${origin_hash}/${CPM_ARGS_NAME}) else() string(SHA1 origin_hash "${origin_parameters}") + cpm_get_shortest_hash( + "${CPM_SOURCE_CACHE}/${lower_case_name}" # source cache directory + "${origin_hash}" # Input hash + origin_hash # Computed hash + ) set(download_directory ${CPM_SOURCE_CACHE}/${lower_case_name}/${origin_hash}) endif() # Expand `download_directory` relative path. This is important because EXISTS doesn't work for diff --git a/cmake/testing.cmake b/cmake/testing.cmake index bbc1bf27..a46a9f52 100644 --- a/cmake/testing.cmake +++ b/cmake/testing.cmake @@ -79,3 +79,16 @@ function(ASSERT_NOT_EXISTS file) message(FATAL_ERROR "assertion failed: file ${file} exists") endif() endfunction() + +function(ASSERT_CONTENTS_EQUAL file content) + if(EXISTS ${file}) + file(READ ${file} file_content) + if(content STREQUAL file_content) + message(STATUS "test passed: '${file}' exists and contains '${content}'") + else() + message(FATAL_ERROR "assertion failed: file '${file}' does not contain expected content.") + endif() + else() + message(FATAL_ERROR "assertion failed: file '${file} does not exist") + endif() +endfunction() diff --git a/test/integration/test_source_cache.rb b/test/integration/test_source_cache.rb index ce2eb1d7..1d075be7 100644 --- a/test/integration/test_source_cache.rb +++ b/test/integration/test_source_cache.rb @@ -75,6 +75,17 @@ def check_package_cache(name, ver, dir_sha1) assert_equal ver, package.ver expected_parent_dir = File.join(@cache_dir, name.downcase) assert package.src_dir.start_with?(expected_parent_dir), "#{package.src_dir} must be in #{expected_parent_dir}" - assert_equal dir_sha1, File.basename(package.src_dir) + + # The hash has been shortened by cpm_get_shortest_hash. The following + # should hold: + # - The short hash should be a prefix of the input hash + # - There should be a file ".../${short_hash}.hash" which matches the full hash + short_hash = File.basename(package.src_dir) + assert dir_sha1.start_with?(short_hash), "short_hash should be a prefix of dir_sha1" + + # Check that the full hash is stored in the .hash file + hash_file = "#{package.src_dir}.hash" + assert File.exist?(hash_file), "Hash file #{hash_file} should exist" + assert_equal dir_sha1, File.read(hash_file), "Hash file should contain the full original hash" end end diff --git a/test/unit/get_shortest_hash.cmake b/test/unit/get_shortest_hash.cmake new file mode 100644 index 00000000..52bd2e92 --- /dev/null +++ b/test/unit/get_shortest_hash.cmake @@ -0,0 +1,93 @@ +cmake_minimum_required(VERSION 3.14 FATAL_ERROR) + +include(${CPM_PATH}/CPM.cmake) +include(${CPM_PATH}/testing.cmake) + +# Random suffix +string( + RANDOM + LENGTH 6 + ALPHABET "0123456789abcdef" tmpdir_suffix +) + +# Seconds since epoch +string(TIMESTAMP tmpdir_base "%s" UTC) + +set(tmp "${CMAKE_CURRENT_BINARY_DIR}/get_shortest_hash-${tmpdir_base}-${tmpdir_suffix}") + +if(IS_DIRECTORY ${tmp}) + message(FATAL_ERROR "Test directory ${tmp} already exists") +endif() + +file(MAKE_DIRECTORY "${tmp}") + +# 1. Sanity check: none of these directories should exist yet + +assert_not_exists(${tmp}/cccb.hash) +assert_not_exists(${tmp}/cccb77ae.hash) +assert_not_exists(${tmp}/cccb77ae9609.hash) +assert_not_exists(${tmp}/cccb77ae9608.hash) +assert_not_exists(${tmp}/cccb77be.hash) + +# 1. The directory is empty, so it should get a 4-character hash +cpm_get_shortest_hash(${tmp} "cccb77ae9609d2768ed80dd42cec54f77b1f1455" hash) +assert_equal(${hash} "cccb") +assert_contents_equal(${tmp}/cccb.hash cccb77ae9609d2768ed80dd42cec54f77b1f1455) + +# 1. Calling the function with a new hash that differs subtly should result in more characters being +# used, enough to uniquely identify the hash + +cpm_get_shortest_hash(${tmp} "cccb77ae9609d2768ed80dd42cec54f77b1f1456" hash) +assert_equal(${hash} "cccb77ae") +assert_contents_equal(${tmp}/cccb77ae.hash cccb77ae9609d2768ed80dd42cec54f77b1f1456) + +cpm_get_shortest_hash(${tmp} "cccb77ae9609d2768ed80dd42cec54f77b1f1457" hash) +assert_equal(${hash} "cccb77ae9609") +assert_contents_equal(${tmp}/cccb77ae9609.hash cccb77ae9609d2768ed80dd42cec54f77b1f1457) + +cpm_get_shortest_hash(${tmp} "cccb77ae9608d2768ed80dd42cec54f77b1f1455" hash) +assert_equal(${hash} "cccb77ae9608") +assert_contents_equal(${tmp}/cccb77ae9608.hash cccb77ae9608d2768ed80dd42cec54f77b1f1455) + +cpm_get_shortest_hash(${tmp} "cccb77be9609d2768ed80dd42cec54f77b1f1456" hash) +assert_equal(${hash} "cccb77be") +assert_contents_equal(${tmp}/cccb77be.hash cccb77be9609d2768ed80dd42cec54f77b1f1456) + +# check that legacy hashs are recognized +file(MAKE_DIRECTORY "${tmp}/cccb77be9609d2768ed80dd42cec54f77b1f1457") +cpm_get_shortest_hash(${tmp} "cccb77be9609d2768ed80dd42cec54f77b1f1457" hash) +assert_equal(${hash} "cccb77be9609d2768ed80dd42cec54f77b1f1457") + +# 1. The old file should still exist, and have the same content +assert_contents_equal(${tmp}/cccb.hash cccb77ae9609d2768ed80dd42cec54f77b1f1455) +assert_contents_equal(${tmp}/cccb77ae.hash cccb77ae9609d2768ed80dd42cec54f77b1f1456) +assert_contents_equal(${tmp}/cccb77ae9609.hash cccb77ae9609d2768ed80dd42cec54f77b1f1457) +assert_contents_equal(${tmp}/cccb77ae9608.hash cccb77ae9608d2768ed80dd42cec54f77b1f1455) +assert_contents_equal(${tmp}/cccb77be.hash cccb77be9609d2768ed80dd42cec54f77b1f1456) + +# 1. Confirm idempotence: calling any of these function should produce the same hash as before (hash +# lookups work correctly once the .hash files are created) + +cpm_get_shortest_hash(${tmp} "cccb77ae9609d2768ed80dd42cec54f77b1f1455" hash) +assert_equal(${hash} "cccb") +assert_contents_equal(${tmp}/cccb.hash cccb77ae9609d2768ed80dd42cec54f77b1f1455) + +cpm_get_shortest_hash(${tmp} "cccb77ae9609d2768ed80dd42cec54f77b1f1456" hash) +assert_equal(${hash} "cccb77ae") +assert_contents_equal(${tmp}/cccb77ae.hash cccb77ae9609d2768ed80dd42cec54f77b1f1456) + +cpm_get_shortest_hash(${tmp} "cccb77ae9609d2768ed80dd42cec54f77b1f1457" hash) +assert_equal(${hash} "cccb77ae9609") +assert_contents_equal(${tmp}/cccb77ae9609.hash cccb77ae9609d2768ed80dd42cec54f77b1f1457) + +cpm_get_shortest_hash(${tmp} "cccb77ae9608d2768ed80dd42cec54f77b1f1455" hash) +assert_equal(${hash} "cccb77ae9608") +assert_contents_equal(${tmp}/cccb77ae9608.hash cccb77ae9608d2768ed80dd42cec54f77b1f1455) + +cpm_get_shortest_hash(${tmp} "cccb77be9609d2768ed80dd42cec54f77b1f1456" hash) +assert_equal(${hash} "cccb77be") +assert_contents_equal(${tmp}/cccb77be.hash cccb77be9609d2768ed80dd42cec54f77b1f1456) + +# 1. Cleanup - remove the temporary directory that we created + +file(REMOVE_RECURSE ${tmp})