Skip to content

Commit c5ac80a

Browse files
committed
Auto merge of rust-lang#126930 - Xaeroxe:file-checksum-hint, r=<try>
Add unstable support for outputting file checksums for use in cargo Adds an unstable option that appends file checksums and expected lengths to the end of the dep-info file such that `cargo` can read and use these values as an alternative to file mtimes. This PR powers the changes made in this cargo PR rust-lang/cargo#14137 Here's the tracking issue for the cargo feature rust-lang/cargo#14136.
2 parents 3de0a7c + d80d73e commit c5ac80a

File tree

16 files changed

+340
-46
lines changed

16 files changed

+340
-46
lines changed

Cargo.lock

+26
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,12 @@ dependencies = [
237237
"object 0.35.0",
238238
]
239239

240+
[[package]]
241+
name = "arrayref"
242+
version = "0.3.7"
243+
source = "registry+https://github.com/rust-lang/crates.io-index"
244+
checksum = "6b4930d2cb77ce62f89ee5d5289b4ac049559b1c45539271f5ed4fdc7db34545"
245+
240246
[[package]]
241247
name = "arrayvec"
242248
version = "0.7.4"
@@ -356,6 +362,19 @@ version = "2.5.0"
356362
source = "registry+https://github.com/rust-lang/crates.io-index"
357363
checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1"
358364

365+
[[package]]
366+
name = "blake3"
367+
version = "1.5.2"
368+
source = "registry+https://github.com/rust-lang/crates.io-index"
369+
checksum = "3d08263faac5cde2a4d52b513dadb80846023aade56fcd8fc99ba73ba8050e92"
370+
dependencies = [
371+
"arrayref",
372+
"arrayvec",
373+
"cc",
374+
"cfg-if",
375+
"constant_time_eq",
376+
]
377+
359378
[[package]]
360379
name = "block-buffer"
361380
version = "0.10.4"
@@ -842,6 +861,12 @@ dependencies = [
842861
"windows-sys 0.52.0",
843862
]
844863

864+
[[package]]
865+
name = "constant_time_eq"
866+
version = "0.3.0"
867+
source = "registry+https://github.com/rust-lang/crates.io-index"
868+
checksum = "f7144d30dcf0fafbce74250a3963025d8d52177934239851c917d29f1df280c2"
869+
845870
[[package]]
846871
name = "core"
847872
version = "0.0.0"
@@ -4754,6 +4779,7 @@ dependencies = [
47544779
name = "rustc_span"
47554780
version = "0.0.0"
47564781
dependencies = [
4782+
"blake3",
47574783
"derivative",
47584784
"indexmap",
47594785
"itoa",

compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs

+1
Original file line numberDiff line numberDiff line change
@@ -636,6 +636,7 @@ pub fn file_metadata<'ll>(cx: &CodegenCx<'ll, '_>, source_file: &SourceFile) ->
636636
rustc_span::SourceFileHashAlgorithm::Md5 => llvm::ChecksumKind::MD5,
637637
rustc_span::SourceFileHashAlgorithm::Sha1 => llvm::ChecksumKind::SHA1,
638638
rustc_span::SourceFileHashAlgorithm::Sha256 => llvm::ChecksumKind::SHA256,
639+
rustc_span::SourceFileHashAlgorithm::Blake3 => llvm::ChecksumKind::None,
639640
};
640641
let hash_value = hex_encode(source_file.src_hash.hash_bytes());
641642

compiler/rustc_interface/src/interface.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -387,12 +387,13 @@ pub fn run_compiler<R: Send>(config: Config, f: impl FnOnce(&Compiler) -> R + Se
387387
let file_loader = config.file_loader.unwrap_or_else(|| Box::new(RealFileLoader));
388388
let path_mapping = config.opts.file_path_mapping();
389389
let hash_kind = config.opts.unstable_opts.src_hash_algorithm(&target);
390+
let checksum_hash_kind = config.opts.unstable_opts.checksum_hash_algorithm();
390391

391392
util::run_in_thread_pool_with_globals(
392393
&early_dcx,
393394
config.opts.edition,
394395
config.opts.unstable_opts.threads,
395-
SourceMapInputs { file_loader, path_mapping, hash_kind },
396+
SourceMapInputs { file_loader, path_mapping, hash_kind, checksum_hash_kind },
396397
|current_gcx| {
397398
// The previous `early_dcx` can't be reused here because it doesn't
398399
// impl `Send`. Creating a new one is fine.

compiler/rustc_interface/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
// tidy-alphabetical-start
22
#![feature(decl_macro)]
3+
#![feature(iter_intersperse)]
34
#![feature(let_chains)]
45
#![feature(thread_spawn_unchecked)]
56
#![feature(try_blocks)]

compiler/rustc_interface/src/passes.rs

+92-14
Original file line numberDiff line numberDiff line change
@@ -32,12 +32,13 @@ use rustc_session::output::{collect_crate_types, find_crate_name};
3232
use rustc_session::search_paths::PathKind;
3333
use rustc_session::{Limit, Session};
3434
use rustc_span::symbol::{sym, Symbol};
35-
use rustc_span::FileName;
35+
use rustc_span::{FileName, SourceFileHash, SourceFileHashAlgorithm};
3636
use rustc_target::spec::PanicStrategy;
3737
use rustc_trait_selection::traits;
3838

3939
use std::any::Any;
4040
use std::ffi::OsString;
41+
use std::fs::File;
4142
use std::io::{self, BufWriter, Write};
4243
use std::path::{Path, PathBuf};
4344
use std::sync::{Arc, LazyLock};
@@ -420,15 +421,23 @@ fn write_out_deps(tcx: TyCtxt<'_>, outputs: &OutputFilenames, out_filenames: &[P
420421
let result: io::Result<()> = try {
421422
// Build a list of files used to compile the output and
422423
// write Makefile-compatible dependency rules
423-
let mut files: Vec<String> = sess
424+
let mut files: Vec<(String, u64, Option<SourceFileHash>)> = sess
424425
.source_map()
425426
.files()
426427
.iter()
427428
.filter(|fmap| fmap.is_real_file())
428429
.filter(|fmap| !fmap.is_imported())
429-
.map(|fmap| escape_dep_filename(&fmap.name.prefer_local().to_string()))
430+
.map(|fmap| {
431+
(
432+
escape_dep_filename(&fmap.name.prefer_local().to_string()),
433+
fmap.source_len.0 as u64,
434+
fmap.checksum_hash,
435+
)
436+
})
430437
.collect();
431438

439+
let checksum_hash_algo = sess.opts.unstable_opts.checksum_hash_algorithm;
440+
432441
// Account for explicitly marked-to-track files
433442
// (e.g. accessed in proc macros).
434443
let file_depinfo = sess.psess.file_depinfo.borrow();
@@ -438,58 +447,115 @@ fn write_out_deps(tcx: TyCtxt<'_>, outputs: &OutputFilenames, out_filenames: &[P
438447
escape_dep_filename(&file.prefer_local().to_string())
439448
};
440449

450+
fn hash_iter_files<P: AsRef<Path>>(
451+
it: impl Iterator<Item = P>,
452+
checksum_hash_algo: Option<SourceFileHashAlgorithm>,
453+
) -> impl Iterator<Item = (P, u64, Option<SourceFileHash>)> {
454+
it.map(move |path| {
455+
match checksum_hash_algo.and_then(|algo| {
456+
File::open(path.as_ref())
457+
.and_then(|mut file| {
458+
SourceFileHash::new(algo, &mut file).map(|h| (file, h))
459+
})
460+
.and_then(|(file, h)| file.metadata().map(|m| (m.len(), h)))
461+
.map_err(|e| {
462+
tracing::error!(
463+
"failed to compute checksum, omitting it from dep-info {} {e}",
464+
path.as_ref().display()
465+
)
466+
})
467+
.ok()
468+
}) {
469+
Some((file_len, checksum)) => (path, file_len, Some(checksum)),
470+
None => (path, 0, None),
471+
}
472+
})
473+
}
474+
441475
// The entries will be used to declare dependencies beween files in a
442476
// Makefile-like output, so the iteration order does not matter.
443477
#[allow(rustc::potential_query_instability)]
444-
let extra_tracked_files =
445-
file_depinfo.iter().map(|path_sym| normalize_path(PathBuf::from(path_sym.as_str())));
478+
let extra_tracked_files = hash_iter_files(
479+
file_depinfo.iter().map(|path_sym| normalize_path(PathBuf::from(path_sym.as_str()))),
480+
checksum_hash_algo,
481+
);
446482
files.extend(extra_tracked_files);
447483

448484
// We also need to track used PGO profile files
449485
if let Some(ref profile_instr) = sess.opts.cg.profile_use {
450-
files.push(normalize_path(profile_instr.as_path().to_path_buf()));
486+
files.extend(hash_iter_files(
487+
iter::once(normalize_path(profile_instr.as_path().to_path_buf())),
488+
checksum_hash_algo,
489+
));
451490
}
452491
if let Some(ref profile_sample) = sess.opts.unstable_opts.profile_sample_use {
453-
files.push(normalize_path(profile_sample.as_path().to_path_buf()));
492+
files.extend(hash_iter_files(
493+
iter::once(normalize_path(profile_sample.as_path().to_path_buf())),
494+
checksum_hash_algo,
495+
));
454496
}
455497

456498
// Debugger visualizer files
457499
for debugger_visualizer in tcx.debugger_visualizers(LOCAL_CRATE) {
458-
files.push(normalize_path(debugger_visualizer.path.clone().unwrap()));
500+
files.extend(hash_iter_files(
501+
iter::once(normalize_path(debugger_visualizer.path.clone().unwrap())),
502+
checksum_hash_algo,
503+
));
459504
}
460505

461506
if sess.binary_dep_depinfo() {
462507
if let Some(ref backend) = sess.opts.unstable_opts.codegen_backend {
463508
if backend.contains('.') {
464509
// If the backend name contain a `.`, it is the path to an external dynamic
465510
// library. If not, it is not a path.
466-
files.push(backend.to_string());
511+
files.extend(hash_iter_files(
512+
iter::once(backend.to_string()),
513+
checksum_hash_algo,
514+
));
467515
}
468516
}
469517

470518
for &cnum in tcx.crates(()) {
471519
let source = tcx.used_crate_source(cnum);
472520
if let Some((path, _)) = &source.dylib {
473-
files.push(escape_dep_filename(&path.display().to_string()));
521+
files.extend(hash_iter_files(
522+
iter::once(escape_dep_filename(&path.display().to_string())),
523+
checksum_hash_algo,
524+
));
474525
}
475526
if let Some((path, _)) = &source.rlib {
476-
files.push(escape_dep_filename(&path.display().to_string()));
527+
files.extend(hash_iter_files(
528+
iter::once(escape_dep_filename(&path.display().to_string())),
529+
checksum_hash_algo,
530+
));
477531
}
478532
if let Some((path, _)) = &source.rmeta {
479-
files.push(escape_dep_filename(&path.display().to_string()));
533+
files.extend(hash_iter_files(
534+
iter::once(escape_dep_filename(&path.display().to_string())),
535+
checksum_hash_algo,
536+
));
480537
}
481538
}
482539
}
483540

484541
let write_deps_to_file = |file: &mut dyn Write| -> io::Result<()> {
485542
for path in out_filenames {
486-
writeln!(file, "{}: {}\n", path.display(), files.join(" "))?;
543+
writeln!(
544+
file,
545+
"{}: {}\n",
546+
path.display(),
547+
files
548+
.iter()
549+
.map(|(path, _file_len, _checksum_hash_algo)| path.as_str())
550+
.intersperse(" ")
551+
.collect::<String>()
552+
)?;
487553
}
488554

489555
// Emit a fake target for each input file to the compilation. This
490556
// prevents `make` from spitting out an error if a file is later
491557
// deleted. For more info see #28735
492-
for path in files {
558+
for (path, _file_len, _checksum_hash_algo) in &files {
493559
writeln!(file, "{path}:")?;
494560
}
495561

@@ -513,6 +579,18 @@ fn write_out_deps(tcx: TyCtxt<'_>, outputs: &OutputFilenames, out_filenames: &[P
513579
}
514580
}
515581

582+
// If caller requested this information, add special comments about source file checksums.
583+
// These are not necessarily the same checksums as was used in the debug files.
584+
if sess.opts.unstable_opts.checksum_hash_algorithm().is_some() {
585+
for (path, file_len, checksum_hash) in
586+
files.iter().filter_map(|(path, file_len, hash_algo)| {
587+
hash_algo.map(|hash_algo| (path, file_len, hash_algo))
588+
})
589+
{
590+
writeln!(file, "# checksum:{checksum_hash} file_len:{file_len} {path}")?;
591+
}
592+
}
593+
516594
Ok(())
517595
};
518596

compiler/rustc_interface/src/tests.rs

+2
Original file line numberDiff line numberDiff line change
@@ -48,10 +48,12 @@ where
4848
let sysroot = filesearch::materialize_sysroot(sessopts.maybe_sysroot.clone());
4949
let target = rustc_session::config::build_target_config(&early_dcx, &sessopts, &sysroot);
5050
let hash_kind = sessopts.unstable_opts.src_hash_algorithm(&target);
51+
let checksum_hash_kind = sessopts.unstable_opts.checksum_hash_algorithm();
5152
let sm_inputs = Some(SourceMapInputs {
5253
file_loader: Box::new(RealFileLoader) as _,
5354
path_mapping: sessopts.file_path_mapping(),
5455
hash_kind,
56+
checksum_hash_kind,
5557
});
5658

5759
rustc_span::create_session_globals_then(DEFAULT_EDITION, sm_inputs, || {

compiler/rustc_metadata/src/rmeta/decoder.rs

+2
Original file line numberDiff line numberDiff line change
@@ -1723,6 +1723,7 @@ impl<'a, 'tcx> CrateMetadataRef<'a> {
17231723
let rustc_span::SourceFile {
17241724
mut name,
17251725
src_hash,
1726+
checksum_hash,
17261727
start_pos: original_start_pos,
17271728
source_len,
17281729
lines,
@@ -1774,6 +1775,7 @@ impl<'a, 'tcx> CrateMetadataRef<'a> {
17741775
let local_version = sess.source_map().new_imported_source_file(
17751776
name,
17761777
src_hash,
1778+
checksum_hash,
17771779
stable_id,
17781780
source_len.to_u32(),
17791781
self.cnum,

compiler/rustc_query_system/src/ich/impls_syntax.rs

+2
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,8 @@ impl<'a> HashStable<StableHashingContext<'a>> for SourceFile {
6868
// Do not hash the source as it is not encoded
6969
src: _,
7070
ref src_hash,
71+
// Already includes src_hash, this is redundant
72+
checksum_hash: _,
7173
external_src: _,
7274
start_pos: _,
7375
source_len: _,

compiler/rustc_session/src/config.rs

+4
Original file line numberDiff line numberDiff line change
@@ -1219,6 +1219,10 @@ impl UnstableOptions {
12191219
}
12201220
})
12211221
}
1222+
1223+
pub fn checksum_hash_algorithm(&self) -> Option<SourceFileHashAlgorithm> {
1224+
self.checksum_hash_algorithm
1225+
}
12221226
}
12231227

12241228
// The type of entry function, so users can have their own entry functions

compiler/rustc_session/src/options.rs

+21-1
Original file line numberDiff line numberDiff line change
@@ -413,7 +413,8 @@ mod desc {
413413
pub const parse_merge_functions: &str = "one of: `disabled`, `trampolines`, or `aliases`";
414414
pub const parse_symbol_mangling_version: &str =
415415
"one of: `legacy`, `v0` (RFC 2603), or `hashed`";
416-
pub const parse_src_file_hash: &str = "either `md5` or `sha1`";
416+
pub const parse_cargo_src_file_hash: &str = "one of `sha256`";
417+
pub const parse_src_file_hash: &str = "one of `md5`, `sha1`, or `sha256`";
417418
pub const parse_relocation_model: &str =
418419
"one of supported relocation models (`rustc --print relocation-models`)";
419420
pub const parse_code_model: &str = "one of supported code models (`rustc --print code-models`)";
@@ -1261,6 +1262,23 @@ mod parse {
12611262
true
12621263
}
12631264

1265+
pub(crate) fn parse_cargo_src_file_hash(
1266+
slot: &mut Option<SourceFileHashAlgorithm>,
1267+
v: Option<&str>,
1268+
) -> bool {
1269+
match v.and_then(|s| SourceFileHashAlgorithm::from_str(s).ok()) {
1270+
Some(hash_kind) => {
1271+
if hash_kind.supported_in_cargo() {
1272+
*slot = Some(hash_kind);
1273+
} else {
1274+
return false;
1275+
}
1276+
}
1277+
_ => return false,
1278+
}
1279+
true
1280+
}
1281+
12641282
pub(crate) fn parse_target_feature(slot: &mut String, v: Option<&str>) -> bool {
12651283
match v {
12661284
Some(s) => {
@@ -1647,6 +1665,8 @@ options! {
16471665
"instrument control-flow architecture protection"),
16481666
check_cfg_all_expected: bool = (false, parse_bool, [UNTRACKED],
16491667
"show all expected values in check-cfg diagnostics (default: no)"),
1668+
checksum_hash_algorithm: Option<SourceFileHashAlgorithm> = (None, parse_cargo_src_file_hash, [TRACKED],
1669+
"hash algorithm of source files used to check freshness in cargo (`sha256`)"),
16501670
codegen_backend: Option<String> = (None, parse_opt_string, [TRACKED],
16511671
"the backend to use"),
16521672
combine_cgu: bool = (false, parse_bool, [TRACKED],

compiler/rustc_span/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ edition = "2021"
55

66
[dependencies]
77
# tidy-alphabetical-start
8+
blake3 = "1.5.2"
89
derivative = "2.2.0"
910
indexmap = { version = "2.0.0" }
1011
itoa = "1.0"

0 commit comments

Comments
 (0)