Skip to content

Commit 371d6cc

Browse files
kevinburkeclaude
andcommitted
extract, list: auto-detect and decompress gzip archives
Previously, extract and list passed the raw file bytes directly to the tar parser without decompression. When given a .tar.gz file, the compressed gzip stream was interpreted as tar headers, producing errors like "numeric field did not have utf-8 text" on the checksum field. Detect gzip compression by reading the two-byte magic number (0x1f 0x8b) at the start of the file, and wrap the reader in a GzDecoder when present. Plain .tar files continue to work as before. Confirmed this patch allows extraction of Go source code from https://go.dev/dl/ (previously we would get an error). Fixes #158. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 78c666a commit 371d6cc

8 files changed

Lines changed: 131 additions & 6 deletions

File tree

Cargo.lock

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ tar = { optional = true, version = "0.0.1", package = "uu_tar", path = "src/uu/t
6767

6868
[dev-dependencies]
6969
chrono = { workspace = true }
70+
flate2 = "1"
7071
libc = { workspace = true }
7172
pretty_assertions = "1"
7273
rand = { workspace = true }

src/uu/tar/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ regex = { workspace = true }
1919
tar = { workspace = true }
2020
chrono = { workspace = true }
2121
thiserror = { workspace = true }
22+
flate2 = "1"
2223

2324
[lib]
2425
path = "src/tar.rs"

src/uu/tar/src/compression.rs

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
// This file is part of the uutils tar package.
2+
//
3+
// For the full copyright and license information, please view the LICENSE
4+
// file that was distributed with this source code.
5+
6+
use std::fs::File;
7+
use std::io::{Read, Seek};
8+
9+
/// Check whether a file is gzip-compressed.
10+
///
11+
/// Gzip files always start with the two-byte signature 0x1f 0x8b.
12+
/// This peeks at the first two bytes and then seeks back to the
13+
/// beginning so the caller can read the file from the start.
14+
pub fn is_gzip(file: &mut File) -> bool {
15+
let mut magic = [0u8; 2];
16+
let n = file.read(&mut magic).unwrap_or(0);
17+
file.seek(std::io::SeekFrom::Start(0)).ok();
18+
n == 2 && magic == [0x1f, 0x8b]
19+
}

src/uu/tar/src/operations/extract.rs

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,11 @@
33
// For the full copyright and license information, please view the LICENSE
44
// file that was distributed with this source code.
55

6+
use crate::compression;
67
use crate::errors::TarError;
8+
use flate2::read::GzDecoder;
79
use std::fs::File;
10+
use std::io::{BufReader, Read};
811
use std::path::Path;
912
use tar::Archive;
1013
use uucore::error::UResult;
@@ -23,11 +26,19 @@ use uucore::error::UResult;
2326
/// - The archive format is invalid
2427
/// - Files cannot be extracted due to I/O or permission errors
2528
pub fn extract_archive(archive_path: &Path, verbose: bool) -> UResult<()> {
26-
// Open the archive file
27-
let file = File::open(archive_path).map_err(|e| TarError::from_io_error(e, archive_path))?;
29+
let mut file =
30+
File::open(archive_path).map_err(|e| TarError::from_io_error(e, archive_path))?;
2831

29-
// Create Archive instance
30-
let mut archive = Archive::new(file);
32+
if compression::is_gzip(&mut file) {
33+
let reader = BufReader::new(GzDecoder::new(file));
34+
return extract_from_reader(reader, archive_path, verbose);
35+
}
36+
37+
extract_from_reader(file, archive_path, verbose)
38+
}
39+
40+
fn extract_from_reader<R: Read>(reader: R, archive_path: &Path, verbose: bool) -> UResult<()> {
41+
let mut archive = Archive::new(reader);
3142

3243
// Extract to current directory
3344
if verbose {

src/uu/tar/src/operations/list.rs

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,19 +3,32 @@
33
// For the full copyright and license information, please view the LICENSE
44
// file that was distributed with this source code.
55

6+
use crate::compression;
67
use crate::errors::TarError;
78
use chrono::{TimeZone, Utc};
9+
use flate2::read::GzDecoder;
810
use std::fs::File;
11+
use std::io::{BufReader, Read};
912
use std::path::Path;
1013
use tar::Archive;
1114
use uucore::error::UResult;
1215
use uucore::fs::display_permissions_unix;
1316

1417
/// List the contents of a tar archive, printing one entry per line.
1518
pub fn list_archive(archive_path: &Path, verbose: bool) -> UResult<()> {
16-
let file: File =
19+
let mut file: File =
1720
File::open(archive_path).map_err(|e| TarError::from_io_error(e, archive_path))?;
18-
let mut archive = Archive::new(file);
21+
22+
if compression::is_gzip(&mut file) {
23+
let reader = BufReader::new(GzDecoder::new(file));
24+
return list_from_reader(reader, verbose);
25+
}
26+
27+
list_from_reader(file, verbose)
28+
}
29+
30+
fn list_from_reader<R: Read>(reader: R, verbose: bool) -> UResult<()> {
31+
let mut archive = Archive::new(reader);
1932

2033
for entry_result in archive.entries().map_err(TarError::CannotReadEntries)? {
2134
let entry = entry_result.map_err(TarError::CannotReadEntry)?;

src/uu/tar/src/tar.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
// For the full copyright and license information, please view the LICENSE
44
// file that was distributed with this source code.
55

6+
pub mod compression;
67
pub mod errors;
78
mod operations;
89

tests/by-util/test_tar.rs

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
// For the full copyright and license information, please view the LICENSE
44
// file that was distributed with this source code.
55

6+
use std::io::Write;
67
use std::path::{self, PathBuf};
78

89
use uutests::{at_and_ucmd, new_ucmd};
@@ -752,3 +753,79 @@ fn test_list_conflicts_with_extract() {
752753
.code_is(2)
753754
.stderr_contains("cannot be used with");
754755
}
756+
757+
// Gzip-compressed archive tests
758+
759+
#[test]
760+
fn test_extract_gzip_archive() {
761+
let (at, _ucmd) = at_and_ucmd!();
762+
763+
// Build a .tar.gz in memory: tar containing one file, then gzip-compress it
764+
let mut tar_bytes = Vec::new();
765+
{
766+
let mut builder = tar_rs_crate::Builder::new(&mut tar_bytes);
767+
let content = b"hello from gzip";
768+
let mut header = tar_rs_crate::Header::new_gnu();
769+
header.set_path("gzfile.txt").unwrap();
770+
header.set_size(content.len() as u64);
771+
header.set_mode(0o644);
772+
header.set_cksum();
773+
builder.append(&header, &content[..]).unwrap();
774+
builder.finish().unwrap();
775+
}
776+
777+
// Gzip-compress the tar bytes
778+
let mut gz_bytes = Vec::new();
779+
{
780+
let mut encoder =
781+
flate2::write::GzEncoder::new(&mut gz_bytes, flate2::Compression::default());
782+
encoder.write_all(&tar_bytes).unwrap();
783+
encoder.finish().unwrap();
784+
}
785+
786+
at.write_bytes("archive.tar.gz", &gz_bytes);
787+
788+
// Extract using our tar implementation
789+
new_ucmd!()
790+
.arg("-xf")
791+
.arg(at.plus("archive.tar.gz"))
792+
.current_dir(at.as_string())
793+
.succeeds();
794+
795+
assert!(at.file_exists("gzfile.txt"));
796+
assert_eq!(at.read("gzfile.txt"), "hello from gzip");
797+
}
798+
799+
#[test]
800+
fn test_list_gzip_archive() {
801+
let (at, _ucmd) = at_and_ucmd!();
802+
803+
// Build a .tar.gz in memory
804+
let mut tar_bytes = Vec::new();
805+
{
806+
let mut builder = tar_rs_crate::Builder::new(&mut tar_bytes);
807+
let content = b"list test content";
808+
let mut header = tar_rs_crate::Header::new_gnu();
809+
header.set_path("listed.txt").unwrap();
810+
header.set_size(content.len() as u64);
811+
header.set_mode(0o644);
812+
header.set_cksum();
813+
builder.append(&header, &content[..]).unwrap();
814+
builder.finish().unwrap();
815+
}
816+
817+
let mut gz_bytes = Vec::new();
818+
{
819+
let mut encoder =
820+
flate2::write::GzEncoder::new(&mut gz_bytes, flate2::Compression::default());
821+
encoder.write_all(&tar_bytes).unwrap();
822+
encoder.finish().unwrap();
823+
}
824+
825+
at.write_bytes("archive.tar.gz", &gz_bytes);
826+
827+
new_ucmd!()
828+
.args(&["-tf", &at.plus_as_string("archive.tar.gz")])
829+
.succeeds()
830+
.stdout_contains("listed.txt");
831+
}

0 commit comments

Comments
 (0)