Skip to content

Commit 237627b

Browse files
committed
Use NonZero types for efficiency
1 parent 022aaea commit 237627b

File tree

2 files changed

+39
-37
lines changed

2 files changed

+39
-37
lines changed

src/data.rs

+23-23
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ use crate::constants::{FOOTER_LINES, HEADER_LINES};
33
use std::cmp::{max, min};
44
use std::convert::TryInto;
55
use std::io::BufRead;
6+
use std::num::{NonZeroU8, NonZeroUsize};
67
use std::ops::RangeInclusive;
78

89
use anyhow::{anyhow, Result};
@@ -103,7 +104,7 @@ fn verify_alphabet(entries: &[Entry], must_aa: bool) -> Result<bool> {
103104
// It's asterisk, or else it's both at least ONLY_AA_OFFSET (meaning)
104105
// at least the value of the first only-AA character, and also found in the ONLY_AA bitmap.
105106
must_be_aa |= (byte == b'*')
106-
| ((ONLY_AA_OFFSET..=b'z').contains(&byte) &
107+
| ((ONLY_AA_OFFSET..=b'z').contains(&byte)
107108
& ((ONLY_AA.wrapping_shr(byte.wrapping_sub(ONLY_AA_OFFSET).into()) & 1) == 1));
108109
}
109110
}
@@ -114,9 +115,10 @@ fn calculate_consensus<'a, T: Iterator<Item = &'a Vec<u8>>>(
114115
seqs: T,
115116
ncols: usize,
116117
is_aa: bool,
117-
) -> Vec<Option<u8>> {
118+
) -> Vec<Option<NonZeroU8>> {
118119
// We have verified seq is between ASCII 33 and 126, inclusive.
119120
let offset = b'!';
121+
let nonzero_offset = NonZeroU8::new(offset).unwrap();
120122
let mut counts = vec![[0u32; 126 - 33 + 1]; ncols];
121123

122124
// First loop over sequences in memory order
@@ -149,17 +151,12 @@ fn calculate_consensus<'a, T: Iterator<Item = &'a Vec<u8>>>(
149151
counts
150152
.iter()
151153
.map(|arr| {
152-
let (most_common_byte, count) = arr
153-
.iter()
154-
.enumerate()
155-
.max_by_key(|(_, &x)| x)
156-
.map(|(i, cnt)| (i as u8 + offset, cnt))
157-
.unwrap();
154+
let (index, count) = arr.iter().enumerate().max_by_key(|(_, &x)| x).unwrap();
158155

159156
if *count == 0 {
160157
None
161158
} else {
162-
Some(most_common_byte)
159+
Some(nonzero_offset.saturating_add(index as u8))
163160
}
164161
})
165162
.collect()
@@ -215,7 +212,7 @@ pub struct Alignment {
215212
// computed from the graphemes field easily
216213
longest_name: usize,
217214
// we calculate this lazily upon demand
218-
consensus: Option<Vec<Option<u8>>>,
215+
consensus: Option<Vec<Option<NonZeroU8>>>,
219216
// When sorting entries by |ent| order[ent.original_index], the rows are ordered.
220217
// also calculate this lazily
221218
order: Option<Vec<u32>>,
@@ -233,7 +230,7 @@ impl Alignment {
233230

234231
fn new<T: BufRead>(file: T, uppercase: bool, must_aa: bool) -> Result<Alignment> {
235232
let reader = fasta::Reader::new(file);
236-
let mut seqlength: Option<usize> = None;
233+
let mut seqlength: Option<NonZeroUsize> = None;
237234
let mut entries = Vec::new();
238235

239236
for (original_index, result) in reader.records().enumerate() {
@@ -243,20 +240,27 @@ impl Alignment {
243240
let record = result?;
244241
let graphemes = Graphemes::new(record.id());
245242
let seq = record.seq().to_vec();
243+
let this_seq_len = match NonZeroUsize::new(seq.len()) {
244+
None => {
245+
return Err(anyhow!(
246+
"Sequence \"{}\" has length zero, which is not allowed.",
247+
graphemes.string
248+
))
249+
}
250+
Some(len) => len,
251+
};
246252

247253
// Check identical sequence lengths
248254
if let Some(len) = seqlength {
249-
if seq.len() != len {
255+
if len != this_seq_len {
250256
return Err(anyhow!(
251-
"Not all input sequences are the same length. \
252-
Expected sequence length {}, seq \"{}\" has length {}.",
253-
len,
254-
&graphemes.string,
255-
seq.len()
257+
"Sequence \"{}\" has a different length than the previous sequence. \
258+
In an alignment, all sequences must have the same length.",
259+
graphemes.string
256260
));
257261
}
258262
} else {
259-
seqlength = Some(seq.len())
263+
seqlength = Some(this_seq_len)
260264
}
261265

262266
// start..stop is span of non-deleted symbols
@@ -288,10 +292,6 @@ impl Alignment {
288292
// Verify alphabet
289293
let is_aa = verify_alphabet(&entries, must_aa)?;
290294

291-
if seqlength.map_or(true, |i| i < 1) {
292-
return Err(anyhow!("Alignment has no seqs, or seqs have length 0."));
293-
}
294-
295295
let longest_name = entries.iter().map(|v| v.graphemes.len()).max().unwrap();
296296
Ok(Alignment {
297297
entries,
@@ -464,7 +464,7 @@ impl View {
464464
self.aln.entries.get(n).map(|x| &x.seq)
465465
}
466466

467-
pub fn consensus(&self) -> Option<&Vec<Option<u8>>> {
467+
pub fn consensus(&self) -> Option<&Vec<Option<NonZeroU8>>> {
468468
self.aln.consensus.as_ref()
469469
}
470470

src/main.rs

+16-14
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ use data::{Graphemes, View};
1414
use std::cmp::min;
1515
use std::ffi::OsString;
1616
use std::io::{stdout, BufReader, Write};
17+
use std::num::NonZeroU8;
1718
use std::path::Path;
1819

1920
use anyhow::Result;
@@ -350,17 +351,17 @@ fn draw_top_consensus<T: Write>(
350351
io: &mut TerminalIO<T>,
351352
colstart: u16,
352353
is_aa: bool,
353-
seq: &[Option<u8>],
354+
seq: &[Option<NonZeroU8>],
354355
) -> Result<()> {
355356
queue!(io.io, cursor::MoveTo(colstart, HEADER_LINES as u16),)?;
356357
for maybe_base in seq {
357358
let (background_color, symbol) = if let Some(byte) = maybe_base {
358359
let bc = if is_aa {
359-
get_color_background_aa(*byte)
360+
get_color_background_aa(byte.get())
360361
} else {
361-
get_color_background_dna(*byte)
362+
get_color_background_dna(byte.get())
362363
};
363-
(bc, *byte as char)
364+
(bc, byte.get() as char)
364365
} else {
365366
(None, ' ')
366367
};
@@ -376,21 +377,22 @@ fn draw_consensus_other_seq<T: Write>(
376377
termrow: u16,
377378
is_aa: bool,
378379
seq: &[u8],
379-
cons: &[Option<u8>],
380+
cons: &[Option<NonZeroU8>],
380381
) -> Result<()> {
381382
queue!(io.io, cursor::MoveTo(colstart, termrow))?;
382383
for (byte, maybe_cons) in seq.iter().zip(cons.iter()) {
383-
let (color, symbol) =
384-
if maybe_cons.is_some() && maybe_cons.unwrap() & 0b11011111 == byte & 0b11011111 {
385-
(None, ' ')
384+
let (color, symbol) = if maybe_cons.is_some()
385+
&& maybe_cons.unwrap().get() & 0b11011111 == byte & 0b11011111
386+
{
387+
(None, ' ')
388+
} else {
389+
let color = if is_aa {
390+
get_color_background_aa(*byte)
386391
} else {
387-
let color = if is_aa {
388-
get_color_background_aa(*byte)
389-
} else {
390-
get_color_background_dna(*byte)
391-
};
392-
(color, *byte as char)
392+
get_color_background_dna(*byte)
393393
};
394+
(color, *byte as char)
395+
};
394396
set_terminal_color(io, color)?;
395397
queue!(io.io, Print(symbol))?;
396398
}

0 commit comments

Comments
 (0)