|
| 1 | +use std::collections::HashMap; |
| 2 | + |
| 3 | +use anyhow::bail; |
| 4 | + |
| 5 | +use crate::align; |
| 6 | +use crate::alignment::{Alignment, InternalMapping, Mapping, PairwiseAlignment, Sequences}; |
| 7 | +use crate::tree::{NodeIdx, NodeIdx::Internal as Int, NodeIdx::Leaf, Tree}; |
| 8 | +use crate::Result; |
| 9 | + |
| 10 | +pub struct AlignmentBuilder<'a> { |
| 11 | + tree: &'a Tree, |
| 12 | + seqs: Sequences, |
| 13 | + node_map: InternalMapping, |
| 14 | +} |
| 15 | + |
| 16 | +impl<'a> AlignmentBuilder<'a> { |
| 17 | + pub fn new(tree: &'a Tree, seqs: Sequences) -> AlignmentBuilder<'a> { |
| 18 | + AlignmentBuilder { |
| 19 | + tree, |
| 20 | + seqs, |
| 21 | + node_map: InternalMapping::new(), |
| 22 | + } |
| 23 | + } |
| 24 | + |
| 25 | + pub fn msa(mut self, msa: InternalMapping) -> Self { |
| 26 | + self.node_map = msa; |
| 27 | + self |
| 28 | + } |
| 29 | + |
| 30 | + pub fn build(self) -> Result<Alignment> { |
| 31 | + if self.node_map.is_empty() { |
| 32 | + if self.seqs.aligned { |
| 33 | + self.build_from_seqs() |
| 34 | + } else { |
| 35 | + self.build_from_unaligned() |
| 36 | + } |
| 37 | + } else { |
| 38 | + self.build_from_map() |
| 39 | + } |
| 40 | + } |
| 41 | + |
| 42 | + /// This assumes that the tree structure matches the alignment structure and that the sequences are aligned. |
| 43 | + fn build_from_seqs(self) -> Result<Alignment> { |
| 44 | + let msa_len = self.seqs.msa_len(); |
| 45 | + let mut stack = HashMap::<NodeIdx, Mapping>::with_capacity(self.tree.len()); |
| 46 | + let mut msa = InternalMapping::with_capacity(self.tree.n); |
| 47 | + for node in self.tree.postorder.iter() { |
| 48 | + match node { |
| 49 | + Int(_) => { |
| 50 | + let childs = self.tree.children(node); |
| 51 | + let map_x = stack[&childs[0]].clone(); |
| 52 | + let map_y = stack[&childs[1]].clone(); |
| 53 | + stack.insert(*node, Self::stack_maps(msa_len, &map_x, &map_y)); |
| 54 | + msa.insert(*node, Self::clear_common_gaps(msa_len, &map_x, &map_y)); |
| 55 | + } |
| 56 | + Leaf(_) => { |
| 57 | + let seq = self.seqs.get_by_id(self.tree.node_id(node)).seq(); |
| 58 | + stack.insert(*node, align!(seq).clone()); |
| 59 | + } |
| 60 | + } |
| 61 | + } |
| 62 | + let leaf_maps = stack |
| 63 | + .iter() |
| 64 | + .filter_map(|(idx, map)| match idx { |
| 65 | + Leaf(_) => Some((*idx, map.clone())), |
| 66 | + _ => None, |
| 67 | + }) |
| 68 | + .collect(); |
| 69 | + Ok(Alignment { |
| 70 | + seqs: self.seqs.without_gaps(), |
| 71 | + leaf_map: leaf_maps, |
| 72 | + node_map: msa, |
| 73 | + }) |
| 74 | + } |
| 75 | + |
| 76 | + fn build_from_unaligned(self) -> Result<Alignment> { |
| 77 | + // TODO: use parsimony to align the sequences. |
| 78 | + bail!("Unaligned sequences are not yet supported.") |
| 79 | + } |
| 80 | + |
| 81 | + /// This assumes that the tree structure matches the alignment structure. |
| 82 | + fn build_from_map(self) -> Result<Alignment> { |
| 83 | + let mut alignment = Alignment { |
| 84 | + seqs: Sequences::new(Vec::new()), |
| 85 | + leaf_map: HashMap::new(), |
| 86 | + node_map: self.node_map, |
| 87 | + }; |
| 88 | + let leaf_map = alignment.compile_leaf_map(self.tree.root, self.tree)?; |
| 89 | + alignment.leaf_map = leaf_map; |
| 90 | + alignment.seqs = self.seqs.without_gaps(); |
| 91 | + Ok(alignment) |
| 92 | + } |
| 93 | + |
| 94 | + fn stack_maps(msa_len: usize, map_x: &Mapping, map_y: &Mapping) -> Mapping { |
| 95 | + let mut map = Vec::with_capacity(msa_len); |
| 96 | + let mut ind: usize = 0; |
| 97 | + for (x, y) in map_x.iter().zip(map_y.iter()) { |
| 98 | + if x.is_none() && y.is_none() { |
| 99 | + map.push(None); |
| 100 | + } else { |
| 101 | + map.push(Some(ind)); |
| 102 | + ind += 1; |
| 103 | + } |
| 104 | + } |
| 105 | + map |
| 106 | + } |
| 107 | + |
| 108 | + fn clear_common_gaps(msa_len: usize, map_x: &Mapping, map_y: &Mapping) -> PairwiseAlignment { |
| 109 | + let mut upd_map_x = Vec::with_capacity(msa_len); |
| 110 | + let mut upd_map_y = Vec::with_capacity(msa_len); |
| 111 | + for (x, y) in map_x.iter().zip(map_y.iter()) { |
| 112 | + if x.is_none() && y.is_none() { |
| 113 | + continue; |
| 114 | + } else { |
| 115 | + upd_map_x.push(*x); |
| 116 | + upd_map_y.push(*y); |
| 117 | + } |
| 118 | + } |
| 119 | + PairwiseAlignment::new(upd_map_x, upd_map_y) |
| 120 | + } |
| 121 | +} |
0 commit comments