@@ -178,11 +178,11 @@ def vcf2tsv(vcf_dir):
178
178
os .remove (filename )
179
179
#concatenate the query results for all donors
180
180
for fp in glob .glob (os .path .join (vcf_dir , "*.query.txt" ), recursive = True ):
181
- projectId , donorId = os .path .basename (fp ).split ("." )[0 :2 ]
181
+ projectId , donorId , sampleId , experiment = os .path .basename (fp ).split ("." )[0 :5 ]
182
182
evtype = os .path .basename (fp ).split ("." )[- 3 ]
183
183
cat = f'cat { fp } '
184
184
awk = f'awk \' {{printf "\\ t%s\\ t%d\\ t%s\\ t%s\\ t%s\\ t%s\\ t%s\\ n\" ,$1,$2,$3,$4,$5,$6,$7}}\' '
185
- sed = f'sed "s/^/{ donorId } /g" >> { vcf_dir } .{ evtype } .all'
185
+ sed = f'sed "s/^/{ projectId } \t { donorId } \t { sampleId } \t { experiment } /g" >> { vcf_dir } .{ evtype } .all'
186
186
cmd = '|' .join ([cat , awk , sed ])
187
187
run_cmd (cmd )
188
188
@@ -246,7 +246,7 @@ def union_vcf(data_dir, union_dir):
246
246
donor = set ()
247
247
248
248
for fn in glob .glob (os .path .join (data_dir , "*_annot_vcf" , "*-*" , "*.query.txt" ), recursive = True ):
249
- donor .add (os .path .basename (fn ).split ("2020" )[0 ].rstrip ('.' ))
249
+ donor .add (os .path .basename (fn ).split (". 2020" )[0 ].rstrip ('.' ))
250
250
251
251
for evtype in ['snv' , 'indel' ]:
252
252
for do in donor :
@@ -363,7 +363,9 @@ def snv_readcount_annot(union_dir, validated_dir, readcount_dir):
363
363
os .makedirs (validated_dir )
364
364
365
365
for fn in glob .glob (os .path .join (union_dir , "*.snv.vcf" ), recursive = True ):
366
- projectId , donorId , sampleId , library_strategy , evtype , fileformat = os .path .basename (fn ).split ("." )
366
+ projectId , donorId , sampleId , library_strategy = os .path .basename (fp ).split ("." )[0 :5 ]
367
+ evtype , fileformat = os .path .basename (fp ).split ("." )[- 2 :]
368
+
367
369
output_vcf = os .path .join (validated_dir , '.' .join ([projectId , donorId , 'validated' , evtype , fileformat ]))
368
370
normal_rc = glob .glob (os .path .join (readcount_dir , '.' .join ([projectId , donorId , sample [donorId ]['normal' ], 'targeted-seq' , '*' , 'aln.bam.rc' ])))[0 ]
369
371
tumour_rc = glob .glob (os .path .join (readcount_dir , '.' .join ([projectId , donorId , sample [donorId ]['tumour' ], 'targeted-seq' , '*' , 'aln.bam.rc' ])))[0 ]
0 commit comments