Skip to content

Commit b0a4aaf

Browse files
committed
Use shorter hashes with CPM_SOURCE_CACHE (cpm-cmake#624)
Uses shorter hashes with CPM_SOURCE_CACHE. Falls back to a longer hash if necessary (ie, if there's a collision with an existing hash). See: cpm-cmake#624
1 parent 196b1a7 commit b0a4aaf

File tree

2 files changed

+140
-0
lines changed

2 files changed

+140
-0
lines changed

cmake/CPM.cmake

+56
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,52 @@ function(cpm_package_name_from_git_uri URI RESULT)
198198
endif()
199199
endfunction()
200200

201+
202+
# Find the shortest hash that can be used
203+
# eg, if origin_hash is cccb77ae9609d2768ed80dd42cec54f77b1f1455
204+
# the following files will be checked, until one is found that
205+
# is either empty (allowing us to assign origin_hash), or whose contents matches
206+
# ${origin_hash}
207+
#
208+
# - .../cccb.hash
209+
# - .../cccb77ae.hash
210+
# - .../cccb77ae9609.hash
211+
# - .../cccb77ae9609d276.hash
212+
# etc
213+
# We will be able to use a shorter path with very high probability, but in the
214+
# (rare) event that the first couple characters collide, we will check
215+
# longer and longer substrings.
216+
function(cpm_get_shortest_hash source_cache_dir origin_hash short_hash_output_var)
217+
foreach(len RANGE 4 40 4)
218+
string(SUBSTRING "${origin_hash}" 0 ${len} short_hash)
219+
set(hash_lock ${source_cache_dir}/${short_hash}.lock)
220+
set(hash_fp ${source_cache_dir}/${short_hash}.hash)
221+
# Take a lock, so we don't have a race condition with another instance
222+
# of cmake. We will release this lock when we can, however, if there
223+
# is an error, we want to ensure it gets released on it's own on exit
224+
# from the function.
225+
file(LOCK ${hash_lock} GUARD FUNCTION)
226+
227+
# Load the contents of .../${short_hash}.hash
228+
file(TOUCH ${hash_fp})
229+
file(READ ${hash_fp} hash_fp_contents)
230+
231+
if(hash_fp_contents STREQUAL "")
232+
# Write the origin hash
233+
file(WRITE ${hash_fp} ${origin_hash})
234+
file(LOCK ${hash_lock} RELEASE)
235+
break()
236+
elseif(hash_fp_contents STREQUAL origin_hash)
237+
file(LOCK ${hash_lock} RELEASE)
238+
break()
239+
else()
240+
file(LOCK ${hash_lock} RELEASE)
241+
endif()
242+
endforeach()
243+
set(${short_hash_output_var} "${short_hash}" PARENT_SCOPE)
244+
endfunction()
245+
246+
201247
# Try to infer package name and version from a url
202248
function(cpm_package_name_and_ver_from_url url outName outVer)
203249
if(url MATCHES "[/\\?]([a-zA-Z0-9_\\.-]+)\\.(tar|tar\\.gz|tar\\.bz2|zip|ZIP)(\\?|/|$)")
@@ -779,9 +825,19 @@ function(CPMAddPackage)
779825
set(download_directory ${CPM_SOURCE_CACHE}/${lower_case_name}/${CPM_ARGS_CUSTOM_CACHE_KEY})
780826
elseif(CPM_USE_NAMED_CACHE_DIRECTORIES)
781827
string(SHA1 origin_hash "${origin_parameters};NEW_CACHE_STRUCTURE_TAG")
828+
cpm_get_shortest_hash(
829+
"${CPM_SOURCE_CACHE}/${lower_case_name}" # source cache directory
830+
"${origin_hash}" # Input hash
831+
origin_hash # Computed hash
832+
)
782833
set(download_directory ${CPM_SOURCE_CACHE}/${lower_case_name}/${origin_hash}/${CPM_ARGS_NAME})
783834
else()
784835
string(SHA1 origin_hash "${origin_parameters}")
836+
cpm_get_shortest_hash(
837+
"${CPM_SOURCE_CACHE}/${lower_case_name}" # source cache directory
838+
"${origin_hash}" # Input hash
839+
origin_hash # Computed hash
840+
)
785841
set(download_directory ${CPM_SOURCE_CACHE}/${lower_case_name}/${origin_hash})
786842
endif()
787843
# Expand `download_directory` relative path. This is important because EXISTS doesn't work for

test/unit/get_shortest_hash.cmake

+84
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
2+
3+
include(${CPM_PATH}/CPM.cmake)
4+
include(${CPM_PATH}/testing.cmake)
5+
6+
# Random suffix
7+
string(RANDOM LENGTH 6 ALPHABET "0123456789abcdef" tmpdir_suffix)
8+
9+
# Seconds since epoch
10+
string(TIMESTAMP tmpdir_base "%s" UTC)
11+
12+
set(tmp "get_shortest_hash-${tmpdir_base}-${tmpdir_suffix}")
13+
14+
if(IS_DIRECTORY ${tmp})
15+
message(FATAL_ERROR "Test directory ${tmp} already exists")
16+
endif()
17+
18+
file(MAKE_DIRECTORY "${tmp}")
19+
20+
# 1. Sanity check: none of these directories should exist yet
21+
22+
assert_not_exists(${tmp}/cccb.hash)
23+
assert_not_exists(${tmp}/cccb77ae.hash)
24+
assert_not_exists(${tmp}/cccb77ae9609.hash)
25+
assert_not_exists(${tmp}/cccb77ae9608.hash)
26+
assert_not_exists(${tmp}/cccb77be.hash)
27+
28+
# 2. The directory is empty, so it should get a 4-character hash
29+
cpm_get_shortest_hash(${tmp} "cccb77ae9609d2768ed80dd42cec54f77b1f1455" hash)
30+
assert_equal(${hash} "cccb")
31+
assert_contents_equal(${tmp}/cccb.hash cccb77ae9609d2768ed80dd42cec54f77b1f1455)
32+
33+
# 3. Calling the function with a new hash that differs subtly should result
34+
# in more characters being used, enough to uniquely identify the hash
35+
36+
cpm_get_shortest_hash(${tmp} "cccb77ae9609d2768ed80dd42cec54f77b1f1456" hash)
37+
assert_equal(${hash} "cccb77ae")
38+
assert_contents_equal(${tmp}/cccb77ae.hash cccb77ae9609d2768ed80dd42cec54f77b1f1456)
39+
40+
cpm_get_shortest_hash(${tmp} "cccb77ae9609d2768ed80dd42cec54f77b1f1457" hash)
41+
assert_equal(${hash} "cccb77ae9609")
42+
assert_contents_equal(${tmp}/cccb77ae9609.hash cccb77ae9609d2768ed80dd42cec54f77b1f1457)
43+
44+
cpm_get_shortest_hash(${tmp} "cccb77ae9608d2768ed80dd42cec54f77b1f1455" hash)
45+
assert_equal(${hash} "cccb77ae9608")
46+
assert_contents_equal(${tmp}/cccb77ae9608.hash cccb77ae9608d2768ed80dd42cec54f77b1f1455)
47+
48+
cpm_get_shortest_hash(${tmp} "cccb77be9609d2768ed80dd42cec54f77b1f1456" hash)
49+
assert_equal(${hash} "cccb77be")
50+
assert_contents_equal(${tmp}/cccb77be.hash cccb77be9609d2768ed80dd42cec54f77b1f1456)
51+
52+
# 4. The old file should still exist, and have the same content
53+
assert_contents_equal(${tmp}/cccb.hash cccb77ae9609d2768ed80dd42cec54f77b1f1455)
54+
assert_contents_equal(${tmp}/cccb77ae.hash cccb77ae9609d2768ed80dd42cec54f77b1f1456)
55+
assert_contents_equal(${tmp}/cccb77ae9609.hash cccb77ae9609d2768ed80dd42cec54f77b1f1457)
56+
assert_contents_equal(${tmp}/cccb77ae9608.hash cccb77ae9608d2768ed80dd42cec54f77b1f1455)
57+
assert_contents_equal(${tmp}/cccb77be.hash cccb77be9609d2768ed80dd42cec54f77b1f1456)
58+
59+
# 5. Confirm idempotence: calling any of these function should produce the same hash
60+
# as before (hash lookups work correctly once the .hash files are created)
61+
62+
cpm_get_shortest_hash(${tmp} "cccb77ae9609d2768ed80dd42cec54f77b1f1455" hash)
63+
assert_equal(${hash} "cccb")
64+
assert_contents_equal(${tmp}/cccb.hash cccb77ae9609d2768ed80dd42cec54f77b1f1455)
65+
66+
cpm_get_shortest_hash(${tmp} "cccb77ae9609d2768ed80dd42cec54f77b1f1456" hash)
67+
assert_equal(${hash} "cccb77ae")
68+
assert_contents_equal(${tmp}/cccb77ae.hash cccb77ae9609d2768ed80dd42cec54f77b1f1456)
69+
70+
cpm_get_shortest_hash(${tmp} "cccb77ae9609d2768ed80dd42cec54f77b1f1457" hash)
71+
assert_equal(${hash} "cccb77ae9609")
72+
assert_contents_equal(${tmp}/cccb77ae9609.hash cccb77ae9609d2768ed80dd42cec54f77b1f1457)
73+
74+
cpm_get_shortest_hash(${tmp} "cccb77ae9608d2768ed80dd42cec54f77b1f1455" hash)
75+
assert_equal(${hash} "cccb77ae9608")
76+
assert_contents_equal(${tmp}/cccb77ae9608.hash cccb77ae9608d2768ed80dd42cec54f77b1f1455)
77+
78+
cpm_get_shortest_hash(${tmp} "cccb77be9609d2768ed80dd42cec54f77b1f1456" hash)
79+
assert_equal(${hash} "cccb77be")
80+
assert_contents_equal(${tmp}/cccb77be.hash cccb77be9609d2768ed80dd42cec54f77b1f1456)
81+
82+
# 6. Cleanup - remove the temporary directory that we created
83+
84+
file(REMOVE_RECURSE ${tmp})

0 commit comments

Comments
 (0)