-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathprocess-sample.py
More file actions
129 lines (83 loc) · 3.53 KB
/
process-sample.py
File metadata and controls
129 lines (83 loc) · 3.53 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
# process-sample.py
# extract reads and make mitochondrial calls
import callmito
import os
import sys
import argparse
import time
# SETUP
parser = argparse.ArgumentParser(description='process-sample.py')
parser.add_argument('--ref', type=str,help='genome fasta with dictionary and .fai',required=True)
parser.add_argument('--finaldir', type=str,help='final dir for output',required=True)
parser.add_argument('--name', type=str,help='name of sample to process',required=True)
parser.add_argument('--cram', type=str,help='aligned cram file',required=True)
parser.add_argument('--coords',type=str,help='coordinates to extract, numts + chrM',required=True)
parser.add_argument('--mitoFa',type=str,help='mito fasta with index',required=True)
parser.add_argument('--mitoFaRotated',type=str,help='rotated mito fasta with index',required=True)
parser.add_argument('--chainfile',type=str,help='liftover chain fail to convert rotated to original',required=True)
parser.add_argument('--diagnosticTable',type=str,help='table of diagnostic SNPs',required=True)
args = parser.parse_args()
#####################################################################
myData = {} # dictionary for keeping and passing information
myData['finalDir'] = args.finaldir
myData['ref'] = args.ref
myData['sampleName'] = args.name
myData['cramFileName'] = args.cram
myData['coordsFileName'] = args.coords
myData['mitoFa'] = args.mitoFa
myData['mitoFaRotated'] = args.mitoFaRotated
myData['diagnosticTable'] = args.diagnosticTable
myData['chainFile'] = args.chainfile
# get sequence len
inFile = open(myData['mitoFa'] + '.fai','r')
line = inFile.readline()
line = line.rstrip()
line = line.split()
l = int(line[1])
myData['mitoLen'] = l
inFile.close()
myData['roteTake'] = 4000 # take 4000 first and last from the rotated
myData['minAlleleFreq'] = 0.5 # require >= 50% read support
# max coverage for downsampling
myData['maxCoverage'] = 5000
# check that have interval list file
myData['mitoFaIntervalList'] = myData['mitoFa'].replace('.fa','.interval_list')
myData['mitoFaRotatedIntervalList'] = myData['mitoFaRotated'].replace('.fa','.interval_list')
if os.path.isfile(myData['mitoFaIntervalList']) is False:
print('ERROR! %s not found, please make interval list' % myData['mitoFaIntervalList'])
sys.exit()
if os.path.isfile(myData['mitoFaRotatedIntervalList']) is False:
print('ERROR! %s not found, please make interval list' % myData['mitoFaIntervalList'])
sys.exit()
if myData['finalDir'][-1] != '/':
myData['finalDir'] += '/'
if os.path.isdir(myData['finalDir']) is False:
print('Error! output dir %s not does not exist' % myData['finalDir'])
sys.exit()
# setup the output dir
myData['finalDirSample'] = myData['finalDir'] + myData['sampleName']
if os.path.isdir(myData['finalDirSample']) is False:
print('making ',myData['finalDirSample'])
cmd = 'mkdir ' + myData['finalDirSample']
print(cmd)
callmito.runCMD(cmd)
myData['finalDirSample'] += '/'
myData['logFileName'] = myData['finalDirSample'] + myData['sampleName'] + '.mito.log'
myData['logFile'] = open(myData['logFileName'],'w')
# add initial infoto log
callmito.init_log(myData)
callmito.check_prog_paths(myData)
# get reads to extract
callmito.extract_reads(myData)
# align to each mito
callmito.align_to_mitos(myData)
callmito.run_coverage(myData)
callmito.down_sample(myData)
# run vcf
callmito.call_vars(myData)
# filter vcf
callmito.filter_germline(myData)
# make fasta and mask
callmito.make_fasta_germline(myData)
callmito.assign_haplogroup(myData)
myData['logFile'].close()