Skip to content

Commit 763e142

Browse files
committed
some updates
1 parent bca9b2a commit 763e142

7 files changed

+100
-86
lines changed

get-song-dump.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ def download(song_url, file_dump):
1414
with open(file_dump, 'w') as fp:
1515
for study in studies:
1616
if study in ['TEST-CA']: continue
17-
analyses = requests.get(song_url + ('/studies/%s/analysis?analysisStates=PUBLISHED' % (study))).json()
17+
analyses = requests.get(song_url + ('/studies/%s/analysis?analysisStates=PUBLISHED,UNPUBLISHED' % (study))).json()
1818
for analysis in analyses:
1919
fp.write(json.dumps(analysis)+"\n")
2020

get-stats.py

+76-68
Large diffs are not rendered by default.

notebooks/compare_pcawg_sanger_qc.ipynb

+8-8
Large diffs are not rendered by default.

quickstat

+1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ then
1717
fi
1818

1919
echo "Total $TYPE calls : " $( cat ${VCFDIR}/*.${TYPE}.vcf | grep -cv "^#" )
20+
echo "Total $TYPE calls(no LOWDEPTH): " $( cat ${VCFDIR}/*.${TYPE}.vcf | grep -v "LOWDEPTH" | grep -cv "^#" )
2021
echo "Total validated $TYPE calls: " $( cat ${VCFDIR}/*.${TYPE}.vcf | grep -c "PASS" )
2122
echo ""
2223
for caller in ${callers}

song-suppress.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77
from argparse import ArgumentParser
88
import time
99
import sys
10-
10+
import csv
11+
import ast
1112

1213
def song_operation(endpoint, operation, token, data=None):
1314

@@ -46,9 +47,11 @@ def main():
4647

4748
if args.report:
4849
with open(args.report, 'r') as fp:
49-
for fline in fp:
50-
run = json.loads(fline)
51-
for a in run.get('run_output_analysis_to_suppress'):
50+
reader = csv.DictReader(fp, delimiter='\t')
51+
for run in reader:
52+
alist = ast.literal_eval(run.get('run_output_analysis_to_suppress'))
53+
for a in alist:
54+
print(a)
5255
endpoint = "%s/studies/%s/analysis/suppress/%s" % (args.song_url, run.get('studyId'), a)
5356
operation = 'analysis_suppress'
5457
song_operation(endpoint, operation, args.token)

utils.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -178,11 +178,11 @@ def vcf2tsv(vcf_dir):
178178
os.remove(filename)
179179
#concatenate the query results for all donors
180180
for fp in glob.glob(os.path.join(vcf_dir, "*.query.txt"), recursive=True):
181-
projectId, donorId = os.path.basename(fp).split(".")[0:2]
181+
projectId, donorId, sampleId, experiment = os.path.basename(fp).split(".")[0:5]
182182
evtype = os.path.basename(fp).split(".")[-3]
183183
cat = f'cat {fp}'
184184
awk = f'awk \'{{printf "\\t%s\\t%d\\t%s\\t%s\\t%s\\t%s\\t%s\\n\",$1,$2,$3,$4,$5,$6,$7}}\''
185-
sed = f'sed "s/^/{donorId}/g" >> {vcf_dir}.{evtype}.all'
185+
sed = f'sed "s/^/{projectId}\t{donorId}\t{sampleId}\t{experiment}/g" >> {vcf_dir}.{evtype}.all'
186186
cmd = '|'.join([cat, awk, sed])
187187
run_cmd(cmd)
188188

@@ -246,7 +246,7 @@ def union_vcf(data_dir, union_dir):
246246
donor = set()
247247

248248
for fn in glob.glob(os.path.join(data_dir, "*_annot_vcf", "*-*", "*.query.txt"), recursive=True):
249-
donor.add(os.path.basename(fn).split("2020")[0].rstrip('.'))
249+
donor.add(os.path.basename(fn).split(".2020")[0].rstrip('.'))
250250

251251
for evtype in ['snv', 'indel']:
252252
for do in donor:
@@ -363,7 +363,9 @@ def snv_readcount_annot(union_dir, validated_dir, readcount_dir):
363363
os.makedirs(validated_dir)
364364

365365
for fn in glob.glob(os.path.join(union_dir, "*.snv.vcf"), recursive=True):
366-
projectId, donorId, sampleId, library_strategy, evtype, fileformat = os.path.basename(fn).split(".")
366+
projectId, donorId, sampleId, library_strategy = os.path.basename(fp).split(".")[0:5]
367+
evtype, fileformat = os.path.basename(fp).split(".")[-2:]
368+
367369
output_vcf = os.path.join(validated_dir, '.'.join([projectId, donorId, 'validated', evtype, fileformat]))
368370
normal_rc = glob.glob(os.path.join(readcount_dir, '.'.join([projectId, donorId, sample[donorId]['normal'], 'targeted-seq', '*', 'aln.bam.rc'])))[0]
369371
tumour_rc = glob.glob(os.path.join(readcount_dir, '.'.join([projectId, donorId, sample[donorId]['tumour'], 'targeted-seq', '*', 'aln.bam.rc'])))[0]

vcf_compare.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ def main():
6363

6464

6565
#download data and annotate
66-
for wf in ['sanger', 'mutect2', 'mutect2-bqsr']:
66+
for wf in ['sanger', 'mutect2']:
6767
subfolder = args.mode + '/' + wf
6868
if not include.get(wf): continue
6969
download(args.dump_path, 'snv', args.token, args.metadata_url, args.storage_url, include.get(wf), subfolder)

0 commit comments

Comments
 (0)