Skip to content

Commit 42c1389

Browse files
committed
made ifd prefetch its data before processing, read_tag now operates on an EndianAwareReader and Arc<dyn AsyncFileReader>
1 parent 1d0bcd7 commit 42c1389

File tree

2 files changed

+58
-47
lines changed

2 files changed

+58
-47
lines changed

src/ifd.rs

+47-47
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
use std::collections::HashMap;
22
use std::io::Read;
33
use std::ops::Range;
4+
use std::sync::Arc;
45

56
use bytes::{buf::Buf, Bytes};
67
use num_enum::TryFromPrimitive;
@@ -195,26 +196,24 @@ impl ImageFileDirectory {
195196
} else {
196197
cursor.read_u16().await?.into()
197198
};
198-
let mut tags = HashMap::with_capacity(tag_count as usize);
199-
for _ in 0..tag_count {
200-
let (tag_name, tag_value) = read_tag(cursor, bigtiff).await?;
201-
tags.insert(tag_name, tag_value);
202-
}
203-
204199
// Tag 2 bytes
205200
// Type 2 bytes
206201
// Count:
207202
// - bigtiff: 8 bytes
208203
// - else: 4 bytes
209204
// Value:
210-
// - bigtiff: 8 bytes either a pointer the value itself
211-
// - else: 4 bytes either a pointer the value itself
205+
// - bigtiff: 8 bytes either a pointer or the value itself
206+
// - else: 4 bytes either a pointer or the value itself
212207
let ifd_entry_byte_size = if bigtiff { 20 } else { 12 };
213-
// The size of `tag_count` that we read above
214-
let tag_count_byte_size = if bigtiff { 8 } else { 2 };
215208

216-
// Reset the cursor position before reading the next ifd offset
217-
cursor.seek(ifd_start + (ifd_entry_byte_size * tag_count) + tag_count_byte_size);
209+
// read all tag data into an EndianAwareReader
210+
let mut reader = cursor.read(ifd_entry_byte_size * tag_count).await?;
211+
212+
let mut tags = HashMap::with_capacity(tag_count as usize);
213+
for _ in 0..tag_count {
214+
let (tag_name, tag_value) = read_tag(&mut reader, cursor.reader(), bigtiff).await?;
215+
tags.insert(tag_name, tag_value);
216+
}
218217

219218
let next_ifd_offset = if bigtiff {
220219
cursor.read_u64().await?
@@ -838,22 +837,50 @@ impl ImageFileDirectory {
838837
}
839838

840839
/// Read a single tag from the cursor
841-
async fn read_tag(cursor: &mut AsyncCursor, bigtiff: bool) -> AsyncTiffResult<(Tag, Value)> {
840+
async fn read_tag(cursor: &mut EndianAwareReader, file_reader: &Arc<dyn AsyncFileReader>, bigtiff: bool) -> AsyncTiffResult<(Tag, Value)> {
842841
// let start_cursor_position = cursor.position();
843842

844-
let tag_name = Tag::from_u16_exhaustive(cursor.read_u16().await?);
843+
let tag_name = Tag::from_u16_exhaustive(cursor.read_u16()?);
845844

846-
let tag_type_code = cursor.read_u16().await?;
845+
let tag_type_code = cursor.read_u16()?;
847846
let tag_type = Type::from_u16(tag_type_code).expect(
848847
"Unknown tag type {tag_type_code}. TODO: we should skip entries with unknown tag types.",
849848
);
850849
let count = if bigtiff {
851-
cursor.read_u64().await?
850+
cursor.read_u64()?
852851
} else {
853-
cursor.read_u32().await?.into()
852+
cursor.read_u32()?.into()
854853
};
855854

856-
let tag_value = read_tag_value(cursor, tag_type, count, bigtiff).await?;
855+
let tag_size = tag_type.size();
856+
857+
let value_byte_length = count.checked_mul(tag_size).unwrap();
858+
859+
// prefetch all tag data
860+
let mut data = if (bigtiff && value_byte_length <= 8) || value_byte_length <= 4 {
861+
// value fits in offset field
862+
let mut res = vec![0u8; value_byte_length as usize];
863+
cursor.read_exact(&mut res)?;
864+
if bigtiff {
865+
cursor.advance(8-value_byte_length)?;
866+
} else {
867+
cursor.advance(4-value_byte_length)?;
868+
}
869+
EndianAwareReader::new(Bytes::from_owner(res).reader(), cursor.endianness())
870+
} else {
871+
// fetch using file_reader
872+
let offset = if bigtiff {
873+
cursor.read_u64()?
874+
} else {
875+
cursor.read_u32()?.into()
876+
};
877+
let reader = file_reader.get_bytes(offset..offset+value_byte_length).await?.reader();
878+
EndianAwareReader::new(reader, cursor.endianness())
879+
// cursor.seek(offset);
880+
// cursor.read(value_byte_length).await?
881+
};
882+
883+
let tag_value = read_tag_value(&mut data, tag_type, count)?;
857884

858885
// TODO: better handle management of cursor state <- should be done now
859886
// let ifd_entry_size = if bigtiff { 20 } else { 12 };
@@ -867,43 +894,16 @@ async fn read_tag(cursor: &mut AsyncCursor, bigtiff: bool) -> AsyncTiffResult<(T
867894
/// NOTE: this does not maintain cursor state
868895
// This is derived from the upstream tiff crate:
869896
// https://github.com/image-rs/image-tiff/blob/6dc7a266d30291db1e706c8133357931f9e2a053/src/decoder/ifd.rs#L369-L639
870-
async fn read_tag_value(
871-
cursor: &mut AsyncCursor,
897+
fn read_tag_value(
898+
data: &mut EndianAwareReader,
872899
tag_type: Type,
873900
count: u64,
874-
bigtiff: bool,
875901
) -> AsyncTiffResult<Value> {
876902
// Case 1: there are no values so we can return immediately.
877903
if count == 0 {
878904
return Ok(Value::List(vec![]));
879905
}
880906

881-
let tag_size = tag_type.size();
882-
883-
let value_byte_length = count.checked_mul(tag_size).unwrap();
884-
885-
// prefetch all tag data
886-
let mut data = if (bigtiff && value_byte_length <= 8) || value_byte_length <= 4 {
887-
// value fits in offset field
888-
let res = cursor.read(value_byte_length).await?;
889-
if bigtiff {
890-
cursor.advance(8-value_byte_length);
891-
} else {
892-
cursor.advance(4-value_byte_length);
893-
}
894-
res
895-
} else {
896-
// Seek cursor
897-
let offset = if bigtiff {
898-
cursor.read_u64().await?
899-
} else {
900-
cursor.read_u32().await?.into()
901-
};
902-
let reader = cursor.reader().get_bytes(offset..offset+value_byte_length).await?.reader();
903-
EndianAwareReader::new(reader, cursor.endianness())
904-
// cursor.seek(offset);
905-
// cursor.read(value_byte_length).await?
906-
};
907907
// Case 2: there is one value.
908908
if count == 1 {
909909
return Ok(match tag_type {

src/reader.rs

+11
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,17 @@ impl EndianAwareReader {
352352
pub(crate) fn new(reader: Reader<Bytes>, endianness: Endianness) -> Self {
353353
Self { reader, endianness }
354354
}
355+
356+
pub(crate) fn endianness(&self) -> Endianness {
357+
self.endianness
358+
}
359+
360+
pub(crate) fn advance(&mut self, amt: u64) -> AsyncTiffResult<u64> {
361+
// TODO: can we use consume?
362+
// from https://stackoverflow.com/a/42247224
363+
Ok(std::io::copy(&mut self.reader.by_ref().take(amt), &mut std::io::sink())?)
364+
}
365+
355366
/// Read a u8 from the cursor, advancing the internal state by 1 byte.
356367
pub(crate) fn read_u8(&mut self) -> AsyncTiffResult<u8> {
357368
Ok(self.reader.read_u8()?)

0 commit comments

Comments
 (0)