@@ -3,6 +3,7 @@ use crate::constants::{FOOTER_LINES, HEADER_LINES};
3
3
use std:: cmp:: { max, min} ;
4
4
use std:: convert:: TryInto ;
5
5
use std:: io:: BufRead ;
6
+ use std:: num:: { NonZeroU8 , NonZeroUsize } ;
6
7
use std:: ops:: RangeInclusive ;
7
8
8
9
use anyhow:: { anyhow, Result } ;
@@ -103,7 +104,7 @@ fn verify_alphabet(entries: &[Entry], must_aa: bool) -> Result<bool> {
103
104
// It's asterisk, or else it's both at least ONLY_AA_OFFSET (meaning)
104
105
// at least the value of the first only-AA character, and also found in the ONLY_AA bitmap.
105
106
must_be_aa |= ( byte == b'*' )
106
- | ( ( ONLY_AA_OFFSET ..=b'z' ) . contains ( & byte) &
107
+ | ( ( ONLY_AA_OFFSET ..=b'z' ) . contains ( & byte)
107
108
& ( ( ONLY_AA . wrapping_shr ( byte. wrapping_sub ( ONLY_AA_OFFSET ) . into ( ) ) & 1 ) == 1 ) ) ;
108
109
}
109
110
}
@@ -114,9 +115,10 @@ fn calculate_consensus<'a, T: Iterator<Item = &'a Vec<u8>>>(
114
115
seqs : T ,
115
116
ncols : usize ,
116
117
is_aa : bool ,
117
- ) -> Vec < Option < u8 > > {
118
+ ) -> Vec < Option < NonZeroU8 > > {
118
119
// We have verified seq is between ASCII 33 and 126, inclusive.
119
120
let offset = b'!' ;
121
+ let nonzero_offset = NonZeroU8 :: new ( offset) . unwrap ( ) ;
120
122
let mut counts = vec ! [ [ 0u32 ; 126 - 33 + 1 ] ; ncols] ;
121
123
122
124
// First loop over sequences in memory order
@@ -149,17 +151,12 @@ fn calculate_consensus<'a, T: Iterator<Item = &'a Vec<u8>>>(
149
151
counts
150
152
. iter ( )
151
153
. map ( |arr| {
152
- let ( most_common_byte, count) = arr
153
- . iter ( )
154
- . enumerate ( )
155
- . max_by_key ( |( _, & x) | x)
156
- . map ( |( i, cnt) | ( i as u8 + offset, cnt) )
157
- . unwrap ( ) ;
154
+ let ( index, count) = arr. iter ( ) . enumerate ( ) . max_by_key ( |( _, & x) | x) . unwrap ( ) ;
158
155
159
156
if * count == 0 {
160
157
None
161
158
} else {
162
- Some ( most_common_byte )
159
+ Some ( nonzero_offset . saturating_add ( index as u8 ) )
163
160
}
164
161
} )
165
162
. collect ( )
@@ -215,7 +212,7 @@ pub struct Alignment {
215
212
// computed from the graphemes field easily
216
213
longest_name : usize ,
217
214
// we calculate this lazily upon demand
218
- consensus : Option < Vec < Option < u8 > > > ,
215
+ consensus : Option < Vec < Option < NonZeroU8 > > > ,
219
216
// When sorting entries by |ent| order[ent.original_index], the rows are ordered.
220
217
// also calculate this lazily
221
218
order : Option < Vec < u32 > > ,
@@ -233,7 +230,7 @@ impl Alignment {
233
230
234
231
fn new < T : BufRead > ( file : T , uppercase : bool , must_aa : bool ) -> Result < Alignment > {
235
232
let reader = fasta:: Reader :: new ( file) ;
236
- let mut seqlength: Option < usize > = None ;
233
+ let mut seqlength: Option < NonZeroUsize > = None ;
237
234
let mut entries = Vec :: new ( ) ;
238
235
239
236
for ( original_index, result) in reader. records ( ) . enumerate ( ) {
@@ -243,20 +240,27 @@ impl Alignment {
243
240
let record = result?;
244
241
let graphemes = Graphemes :: new ( record. id ( ) ) ;
245
242
let seq = record. seq ( ) . to_vec ( ) ;
243
+ let this_seq_len = match NonZeroUsize :: new ( seq. len ( ) ) {
244
+ None => {
245
+ return Err ( anyhow ! (
246
+ "Sequence \" {}\" has length zero, which is not allowed." ,
247
+ graphemes. string
248
+ ) )
249
+ }
250
+ Some ( len) => len,
251
+ } ;
246
252
247
253
// Check identical sequence lengths
248
254
if let Some ( len) = seqlength {
249
- if seq . len ( ) != len {
255
+ if len != this_seq_len {
250
256
return Err ( anyhow ! (
251
- "Not all input sequences are the same length. \
252
- Expected sequence length {}, seq \" {}\" has length {}.",
253
- len,
254
- & graphemes. string,
255
- seq. len( )
257
+ "Sequence \" {}\" has a different length than the previous sequence. \
258
+ In an alignment, all sequences must have the same length.",
259
+ graphemes. string
256
260
) ) ;
257
261
}
258
262
} else {
259
- seqlength = Some ( seq . len ( ) )
263
+ seqlength = Some ( this_seq_len )
260
264
}
261
265
262
266
// start..stop is span of non-deleted symbols
@@ -288,10 +292,6 @@ impl Alignment {
288
292
// Verify alphabet
289
293
let is_aa = verify_alphabet ( & entries, must_aa) ?;
290
294
291
- if seqlength. map_or ( true , |i| i < 1 ) {
292
- return Err ( anyhow ! ( "Alignment has no seqs, or seqs have length 0." ) ) ;
293
- }
294
-
295
295
let longest_name = entries. iter ( ) . map ( |v| v. graphemes . len ( ) ) . max ( ) . unwrap ( ) ;
296
296
Ok ( Alignment {
297
297
entries,
@@ -464,7 +464,7 @@ impl View {
464
464
self . aln . entries . get ( n) . map ( |x| & x. seq )
465
465
}
466
466
467
- pub fn consensus ( & self ) -> Option < & Vec < Option < u8 > > > {
467
+ pub fn consensus ( & self ) -> Option < & Vec < Option < NonZeroU8 > > > {
468
468
self . aln . consensus . as_ref ( )
469
469
}
470
470
0 commit comments