6
6
## Imports
7
7
##########################################################################
8
8
9
+ from brisera .lv import *
9
10
from brisera .utils import *
10
11
from brisera .records import *
11
12
from brisera .config import settings
@@ -91,9 +92,9 @@ def map_reference(self, arg):
91
92
if self .redundancy > 1 and repseed (sequence , start , self .seed_len ):
92
93
for rdx in xrange (self .redundancy ):
93
94
r = rdx % self .redundancy
94
- yield (seed , (key , offset , is_last , leftstart , leftlen , rightstart , rightlen , r ))
95
+ yield (seed , (key , offset , is_last , sequence [ leftstart : leftlen ], sequence [ rightstart : rightlen ] , r ))
95
96
else :
96
- yield (seed , (key , offset , is_last , leftstart , leftlen , rightstart , rightlen , 0 ))
97
+ yield (seed , (key , offset , is_last , sequence [ leftstart : leftlen ], sequence [ rightstart : rightlen ] , 0 ))
97
98
98
99
def map_queries (self , arg ):
99
100
"""
@@ -135,9 +136,43 @@ def map_queries(self, arg):
135
136
136
137
if self .redundancy > 1 and repseed (sequence , idx , self .seed_len ):
137
138
r = key % self .redundancy
138
- yield (seed , (key , idx , is_last , 0 , idx , rightstart , rightlen , r ))
139
+ yield (seed , (key , idx , is_last , sequence [ 0 : idx ], sequence [ rightstart : rightlen ] , r ))
139
140
else :
140
- yield (seed , (key , idx , is_last , 0 , idx , rightstart , rightlen , 0 ))
141
+ yield (seed , (key , idx , is_last , sequence [0 :idx ], sequence [rightstart :rightlen ], 0 ))
142
+
143
+ def extend (self , arg ):
144
+ """
145
+ Performs the extend portion of the BLAST algorithm.
146
+ """
147
+ key , (query , reference ) = arg
148
+ refstart = reference [1 ]
149
+ refend = refstart + self .seed_len
150
+ differences = 0
151
+
152
+ try :
153
+ # Align left flanks
154
+ if len (query [3 ]) != 0 :
155
+ a = LandauVishkin ().kdifference (reference [3 ], query [3 ], self .k )
156
+ if a [0 ] == - 1 :
157
+ return NO_ALIGNMENT
158
+ if not is_bazea_yates_seed (a , len (query [3 ]), self .seed_len ):
159
+ return NO_ALIGNMENT
160
+
161
+ refstart -= a [0 ]
162
+ differences = a [1 ]
163
+
164
+ # Align right flanks
165
+ if len (query [4 ]) != 0 :
166
+ b = LandauVishkin .kdifference (reference [4 ], query [4 ], self .k - differences )
167
+ if a [0 ] == - 1 :
168
+ return NO_ALIGNMENT
169
+
170
+ refend += b [0 ]
171
+ differences += b [1 ]
172
+
173
+ return reference [0 ], refstart , refend , differences
174
+ except :
175
+ return NO_ALIGNMENT
141
176
142
177
##########################################################################
143
178
## Spark Functionality
@@ -148,13 +183,21 @@ def align_all(sc, refpath, qrypath):
148
183
"""
149
184
Returns an RDD of alignments (no writes to disk)
150
185
"""
151
- reference = sc .sequenceFile (refpath )
152
- queries = sc .sequenceFile (qrypath )
153
- alignment = MerAlignment ()
186
+ reference = sc .sequenceFile (refpath )
187
+ queries = sc .sequenceFile (qrypath )
188
+ alignment = MerAlignment ()
154
189
155
190
# Perform mapping
156
- reference = reference .flatMap (alignment .map_reference )
157
- return reference
191
+ reference = reference .flatMap (alignment .map_reference )
192
+ queries = queries .flatMap (alignment .map_queries )
193
+
194
+ # Find shared seeds
195
+ shared = queries .join (reference )
196
+
197
+ # Compute alignments with Landau-Viskin
198
+ alignments = shared .map (alignment .extend ).filter (lambda a : a != NO_ALIGNMENT )
199
+
200
+ return alignments
158
201
159
202
if __name__ == '__main__' :
160
203
from brisera .convert import *
0 commit comments