Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 16 additions & 9 deletions src/archive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use std::io::{self, SeekFrom};
use std::marker;
use std::path::Path;

use crate::entry::{EntryFields, EntryIo};
use crate::entry::{EntryCursor, EntryFields, EntrySegmentKind};
use crate::error::TarError;
use crate::header::BLOCK_SIZE;
use crate::other;
Expand Down Expand Up @@ -39,7 +39,7 @@ pub struct Entries<'a, R: 'a + Read> {
_ignored: marker::PhantomData<&'a Archive<R>>,
}

trait SeekRead: Read + Seek {}
pub(crate) trait SeekRead: Read + Seek {}
impl<R: Read + Seek> SeekRead for R {}

struct EntriesFields<'a> {
Expand Down Expand Up @@ -345,11 +345,18 @@ impl<'a> EntriesFields<'a> {
size = pax_size;
}
}
let real_size = header.size()?;

let mut cursor = EntryCursor::default();
cursor.append_segment(EntrySegmentKind::Data, size, file_pos);

let ret = EntryFields {
size: size,
real_size: real_size,
header_pos: header_pos,
file_pos: file_pos,
data: vec![EntryIo::Data((&self.archive.inner).take(size))],
data: &self.archive.inner,
data_seekable: self.seekable_archive.map(|a| &a.inner),
header: header,
long_pathname: None,
long_linkname: None,
Expand All @@ -360,6 +367,7 @@ impl<'a> EntriesFields<'a> {
preserve_mtime: self.archive.inner.preserve_mtime,
overwrite: self.archive.inner.overwrite,
preserve_ownerships: self.archive.inner.preserve_ownerships,
cursor: cursor,
};

// Store where the next entry is, rounding up by 512 bytes (the size of
Expand Down Expand Up @@ -470,14 +478,14 @@ impl<'a> EntriesFields<'a> {
// the same as the current offset (described by the list of blocks) as
// well as the amount of data read equals the size of the entry
// (`Header::entry_size`).
entry.data.truncate(0);
entry.cursor.segments.truncate(0);

let mut cur = 0;
let mut remaining = entry.size;
{
let data = &mut entry.data;
let reader = &self.archive.inner;
let size = entry.size;
let file_pos = entry.file_pos;
let cursor = &mut entry.cursor;
let mut add_block = |block: &GnuSparseHeader| -> io::Result<_> {
if block.is_empty() {
return Ok(());
Expand All @@ -495,8 +503,7 @@ impl<'a> EntriesFields<'a> {
blocks",
));
} else if cur < off {
let block = io::repeat(0).take(off - cur);
data.push(EntryIo::Pad(block));
cursor.append_segment(EntrySegmentKind::Pad, off, file_pos);
}
cur = off
.checked_add(len)
Expand All @@ -507,7 +514,7 @@ impl<'a> EntriesFields<'a> {
listed",
)
})?;
data.push(EntryIo::Data(reader.take(len)));
cursor.append_segment(EntrySegmentKind::Data, cur, file_pos);
Ok(())
};
for block in gnu.sparse.iter() {
Expand Down
139 changes: 114 additions & 25 deletions src/entry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use std::path::{Component, Path, PathBuf};

use filetime::{self, FileTime};

use crate::archive::ArchiveInner;
use crate::archive::{ArchiveInner, SeekRead};
use crate::error::TarError;
use crate::header::bytes2path;
use crate::other;
Expand All @@ -18,8 +18,12 @@ use crate::{Archive, Header, PaxExtensions};
/// A read-only view into an entry of an archive.
///
/// This structure is a window into a portion of a borrowed archive which can
/// be inspected. It acts as a file handle by implementing the Reader trait. An
/// be inspected. It acts as a file handle by implementing the [Read] trait. An
/// entry cannot be rewritten once inserted into an archive.
///
/// Note that the [Seek] implementation for this type is only valid for values
/// obtained from [`Archive::entries_with_seek`]. Calling [Seek::seek] on a
/// value obtained otherwise will return an error.
pub struct Entry<'a, R: 'a + Read> {
fields: EntryFields<'a>,
_ignored: marker::PhantomData<&'a Archive<R>>,
Expand All @@ -34,19 +38,37 @@ pub struct EntryFields<'a> {
pub mask: u32,
pub header: Header,
pub size: u64,
pub real_size: u64,
pub header_pos: u64,
pub file_pos: u64,
pub data: Vec<EntryIo<'a>>,
pub data: &'a ArchiveInner<dyn Read + 'a>,
pub data_seekable: Option<&'a ArchiveInner<dyn SeekRead + 'a>>,
pub cursor: EntryCursor,
pub unpack_xattrs: bool,
pub preserve_permissions: bool,
pub preserve_ownerships: bool,
pub preserve_mtime: bool,
pub overwrite: bool,
}

pub enum EntryIo<'a> {
Pad(io::Take<io::Repeat>),
Data(io::Take<&'a ArchiveInner<dyn Read + 'a>>),
#[derive(Default)]
pub struct EntryCursor {
pub pos: u64,
pub segments: Vec<EntrySegment>,
pub cur_segment: usize,
}

pub struct EntrySegment {
pub file_off: u64,
pub start: u64,
pub end: u64,
pub kind: EntrySegmentKind,
}

#[derive(Debug)]
pub enum EntrySegmentKind {
Pad,
Data,
}

/// When unpacking items the unpacked thing is returned to allow custom
Expand Down Expand Up @@ -281,6 +303,12 @@ impl<'a, R: Read> Read for Entry<'a, R> {
}
}

impl<'a, R: Read + Seek> Seek for Entry<'a, R> {
fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
self.fields.seek(pos)
}
}

impl<'a> EntryFields<'a> {
pub fn from<R: Read>(entry: Entry<R>) -> EntryFields {
entry.fields
Expand Down Expand Up @@ -659,21 +687,24 @@ impl<'a> EntryFields<'a> {
Err(err)
}
})?;
for io in self.data.drain(..) {
match io {
EntryIo::Data(mut d) => {
let expected = d.limit();
if io::copy(&mut d, &mut f)? != expected {
for seg in &self.cursor.segments[self.cursor.cur_segment..] {
let limit = seg.end - self.cursor.pos;
match seg.kind {
EntrySegmentKind::Data => {
let mut d = (&mut self.data).take(limit);
if io::copy(&mut d, &mut f)? != limit {
return Err(other("failed to write entire file"));
}
}
EntryIo::Pad(d) => {
EntrySegmentKind::Pad => {
// TODO: checked cast to i64
let to = SeekFrom::Current(d.limit() as i64);
let to = SeekFrom::Current(limit as i64);
let size = f.seek(to)?;
f.set_len(size)?;
}
}
self.cursor.pos += limit;
self.cursor.cur_segment += 1;
}
Ok(f)
})()
Expand Down Expand Up @@ -951,23 +982,81 @@ impl<'a> EntryFields<'a> {

impl<'a> Read for EntryFields<'a> {
fn read(&mut self, into: &mut [u8]) -> io::Result<usize> {
loop {
match self.data.get_mut(0).map(|io| io.read(into)) {
Some(Ok(0)) => {
self.data.remove(0);
}
Some(r) => return r,
None => return Ok(0),
for seg in &self.cursor.segments[self.cursor.cur_segment..] {
let limit = seg.end - self.cursor.pos;
let n_read = match seg.kind {
EntrySegmentKind::Pad => io::repeat(0).take(limit).read(into),
EntrySegmentKind::Data => self.data.take(limit).read(into),
}?;
if n_read != 0 {
self.cursor.pos += n_read as u64;
return Ok(n_read);
}
self.cursor.cur_segment += 1;
}
Ok(0)
}
}

impl<'a> Read for EntryIo<'a> {
fn read(&mut self, into: &mut [u8]) -> io::Result<usize> {
match *self {
EntryIo::Pad(ref mut io) => io.read(into),
EntryIo::Data(ref mut io) => io.read(into),
impl<'a> Seek for EntryFields<'a> {
fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
let mut data = self.data_seekable.ok_or_else(|| {
io::Error::new(
io::ErrorKind::Other,
"seeking only supported on entries produced from Archive::entries_with_seek",
)
})?;

let target = match pos {
SeekFrom::Start(n) => Some(n),
SeekFrom::End(n) => self.real_size.checked_add_signed(n),
SeekFrom::Current(n) => self.cursor.pos.checked_add_signed(n),
}
.ok_or_else(|| io::Error::new(io::ErrorKind::Other, "seek pos overflow"))?;

if target == self.cursor.pos {
return Ok(self.cursor.pos);
}

let cur_segment = self.cursor.segments.partition_point(|s| s.end <= target);
let Some(seg) = self.cursor.segments.get(cur_segment) else {
self.cursor.pos = self.real_size;
self.cursor.cur_segment = cur_segment;
return Ok(self.cursor.pos);
};

let pos = match seg.kind {
EntrySegmentKind::Pad => SeekFrom::Start(seg.file_off),
EntrySegmentKind::Data => SeekFrom::Start(seg.file_off + (target - seg.start)),
};
data.seek(pos)?;

self.cursor.pos = target;
self.cursor.cur_segment = cur_segment;
Ok(self.cursor.pos)
}
}

impl EntryCursor {
pub fn append_segment(&mut self, kind: EntrySegmentKind, end: u64, entry_file_pos: u64) {
let (start, file_off) = match self.segments.last() {
Some(prev) => (
prev.end,
match prev.kind {
EntrySegmentKind::Pad => prev.file_off,
EntrySegmentKind::Data => prev.file_off + (prev.end - prev.start),
},
),
None => (0, entry_file_pos),
};
debug_assert!(end >= start);

let seg = EntrySegment {
file_off: file_off,
start: start,
end: end,
kind: kind,
};
self.segments.push(seg);
}
}
80 changes: 79 additions & 1 deletion tests/all.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ extern crate xattr;

use std::fs::{self, File};
use std::io::prelude::*;
use std::io::{self, BufWriter, Cursor};
use std::io::{self, BufWriter, Cursor, SeekFrom};
use std::iter::repeat;
use std::path::{Path, PathBuf};

Expand Down Expand Up @@ -264,6 +264,43 @@ fn reading_entries_with_seek() {
reading_entries_common(ar.entries_with_seek().unwrap());
}

#[test]
fn seeking_entries() {
let rdr = Cursor::new(tar!("reading_files.tar"));
let mut ar = Archive::new(rdr);
let mut entries = ar.entries_with_seek().unwrap();

let mut a = t!(entries.next().unwrap());
assert_eq!(&*a.header().path_bytes(), b"a");
assert_eq!(t!(a.seek(SeekFrom::End(0))), 22);
assert_eq!(t!(a.seek(SeekFrom::Start(2))), 2);
let mut s = String::new();
t!(a.read_to_string(&mut s));
assert_eq!(s, "a\na\na\na\na\na\na\na\na\na\n");
s.truncate(0);
assert!(a.seek(SeekFrom::End(-23)).is_err());
t!(a.seek(SeekFrom::Current(-5)));
t!(a.read_to_string(&mut s));
assert_eq!(s, "\na\na\n");
t!(a.seek(SeekFrom::End(-10)));

let mut b = t!(entries.next().unwrap());
assert_eq!(&*b.header().path_bytes(), b"b");
s.truncate(0);
t!(b.read_to_string(&mut s));
assert_eq!(s, "b\nb\nb\nb\nb\nb\nb\nb\nb\nb\nb\n");
s.truncate(0);
t!(b.seek(SeekFrom::Start(0)));
t!(b.read_to_string(&mut s));
assert_eq!(s, "b\nb\nb\nb\nb\nb\nb\nb\nb\nb\nb\n");
s.truncate(0);
assert_eq!(t!(b.seek(SeekFrom::End(8))), 22);
t!(b.read_to_string(&mut s));
assert_eq!(s, "");

assert!(entries.next().is_none());
}

struct LoggingReader<R> {
inner: R,
read_bytes: u64,
Expand Down Expand Up @@ -1346,6 +1383,47 @@ fn writing_sparse() {
assert!(entries.next().is_none());
}

#[test]
fn seeking_sparse() {
let rdr = Cursor::new(tar!("sparse.tar"));
let mut ar = Archive::new(rdr);
let mut entries = t!(ar.entries_with_seek());

let a = t!(entries.next().unwrap());
assert_eq!(&*a.header().path_bytes(), b"sparse_begin.txt");

let a = t!(entries.next().unwrap());
assert_eq!(&*a.header().path_bytes(), b"sparse_end.txt");

let mut a = t!(entries.next().unwrap());
let mut s = String::new();
assert_eq!(&*a.header().path_bytes(), b"sparse_ext.txt");
t!(a.seek(SeekFrom::Start(0xa000)));
t!(a.read_to_string(&mut s));
assert!(s[..0x1000].chars().all(|x| x == '\u{0}'));
assert_eq!(&s[0x1000..], "text\n");
s.truncate(0);
t!(a.seek(SeekFrom::Current(-(0x2000 + 3))));
t!(a.read_to_string(&mut s));
assert_eq!(&s[..3], "xt\n");
assert!(s[3..0x2000 - 2].chars().all(|x| x == '\u{0}'));
assert_eq!(&s[0x2000 - 2..], "text\n");
s.truncate(0);
t!(a.read_to_string(&mut s));
assert_eq!(s, "");

let mut a = t!(entries.next().unwrap());
let mut s = String::new();
assert_eq!(&*a.header().path_bytes(), b"sparse.txt");
t!(a.seek(SeekFrom::Start(0x2fa0)));
t!(a.read_to_string(&mut s));
assert_eq!(&s[..6], "world\n");
assert!(s[6..].chars().all(|x| x == '\u{0}'));
assert_eq!(s.len(), 0x4000 - 0x2fa0);

assert!(entries.next().is_none());
}

#[test]
fn path_separators() {
let mut ar = Builder::new(Vec::new());
Expand Down