Skip to content

Commit 344ff95

Browse files
committed
update bed files
1 parent d4c786f commit 344ff95

23 files changed

+44
-35
lines changed

data/beds/cds.bed.gz

-215 KB
Binary file not shown.

data/beds/cds.nomerge.bed.gz

2.09 MB
Binary file not shown.

data/beds/exon.bed.gz

-500 KB
Binary file not shown.

data/beds/exon.nomerge.bed.gz

8.78 MB
Binary file not shown.

data/beds/gene.bed.gz

-729 KB
Binary file not shown.

data/beds/intron.bed.gz

-1.88 MB
Binary file not shown.

data/beds/lncRNA.bed.gz

133 KB
Binary file not shown.

data/beds/lncRNA.promoter.bed.gz

206 KB
Binary file not shown.

data/beds/lncRNA_ss.bed.gz

664 KB
Binary file not shown.

data/beds/miRNA.bed.gz

16 KB
Binary file not shown.

data/beds/miRNA.promoter.bed.gz

16.4 KB
Binary file not shown.

data/beds/pc_ss.bed.gz

32.4 MB
Binary file not shown.

data/beds/protein_coding.bed.gz

156 KB
Binary file not shown.
345 KB
Binary file not shown.

data/beds/protein_coding_ss.bed.gz

2.74 MB
Binary file not shown.

data/beds/start_codon.bed.gz

-301 KB
Binary file not shown.

data/beds/stop_codon.bed.gz

-383 KB
Binary file not shown.

data/beds/utr3.bed.gz

586 KB
Binary file not shown.

data/beds/utr3.nomerge.bed.gz

1.16 MB
Binary file not shown.

data/beds/utr5.bed.gz

443 KB
Binary file not shown.

data/beds/utr5.nomerge.bed.gz

1.01 MB
Binary file not shown.

get-stats.py

+41-34
Original file line numberDiff line numberDiff line change
@@ -19,46 +19,50 @@
1919
variant_calling_stats_fields = {
2020
'donor_id': 'donor_id',
2121
'study_id': 'study_id',
22-
'gender': 'gender',
22+
# 'gender': 'gender',
2323
'experimental_strategy': 'experimental_strategy',
24-
'geno_infer_gender': 'geno_infer_gender',
25-
'normal_aligned': 'flags.normal_aligned',
26-
'tumour_aligned': 'flags.tumour_aligned',
27-
'sanger_called': 'flags.sanger_called',
28-
'mutect2_called': 'flags.mutect2_called',
29-
'is_pcawg': 'flags.is_pcawg',
24+
# 'geno_infer_gender': 'geno_infer_gender',
25+
# 'normal_aligned': 'flags.normal_aligned',
26+
# 'tumour_aligned': 'flags.tumour_aligned',
27+
# 'sanger_called': 'flags.sanger_called',
28+
# 'mutect2_called': 'flags.mutect2_called',
29+
# 'is_pcawg': 'flags.is_pcawg',
3030
'normal_sample_id': 'normal.sample_id',
31-
'normal_file_size_gb': 'normal.alignment.file_size',
32-
'normal_error_rate': 'normal.alignment.error_rate',
33-
'normal_duplicate_rate': 'normal.alignment.duplicate_rate',
34-
'normal_pairs_on_different_chromosomes': 'normal.alignment.pairs_on_different_chromosomes',
35-
'normal_pairs_on_different_chromosomes_rate': 'normal.alignment.pairs_on_different_chromosomes_rate',
31+
'normal_submitter_sample_id': 'normal.submitterSampleId',
32+
# 'normal_file_size_gb': 'normal.alignment.file_size',
33+
# 'normal_error_rate': 'normal.alignment.error_rate',
34+
# 'normal_duplicate_rate': 'normal.alignment.duplicate_rate',
35+
# 'normal_pairs_on_different_chromosomes': 'normal.alignment.pairs_on_different_chromosomes',
36+
# 'normal_pairs_on_different_chromosomes_rate': 'normal.alignment.pairs_on_different_chromosomes_rate',
3637
'normal_oxoQ_score': 'normal.alignment.oxoQ_score',
37-
'normal_avg_depth': 'normal.sanger.contamination.avg_depth',
38-
'normal_estimated_coverage': 'normal.alignment.estimated_coverage',
39-
'normal_sanger_contamination': 'normal.sanger.contamination.contamination',
40-
'normal_mutect2_contamination': 'normal.mutect2.contamination.contamination',
41-
'normal_properly_paired_reads': 'normal.alignment.properly_paired_reads',
42-
'normal_total_reads': 'normal.alignment.total_reads',
38+
'normal_insert_size_mean': 'normal.alignment.average_insert_size',
39+
# 'normal_avg_depth': 'normal.sanger.contamination.avg_depth',
40+
# 'normal_estimated_coverage': 'normal.alignment.estimated_coverage',
41+
# 'normal_sanger_contamination': 'normal.sanger.contamination.contamination',
42+
# 'normal_mutect2_contamination': 'normal.mutect2.contamination.contamination',
43+
# 'normal_properly_paired_reads': 'normal.alignment.properly_paired_reads',
44+
# 'normal_total_reads': 'normal.alignment.total_reads',
4345
'tumour_sample_id': 'tumour.sample_id',
44-
'tumour_file_size_gb': 'tumour.alignment.file_size',
45-
'tumour_error_rate': 'tumour.alignment.error_rate',
46-
'tumour_duplicate_rate': 'tumour.alignment.duplicate_rate',
47-
'tumour_pairs_on_different_chromosomes': 'tumour.alignment.pairs_on_different_chromosomes',
48-
'tumour_pairs_on_different_chromosomes_rate': 'tumour.alignment.pairs_on_different_chromosomes_rate',
46+
'tumour_submitter_sample_id': 'tumour.submitterSampleId',
47+
# 'tumour_file_size_gb': 'tumour.alignment.file_size',
48+
# 'tumour_error_rate': 'tumour.alignment.error_rate',
49+
# 'tumour_duplicate_rate': 'tumour.alignment.duplicate_rate',
50+
# 'tumour_pairs_on_different_chromosomes': 'tumour.alignment.pairs_on_different_chromosomes',
51+
# 'tumour_pairs_on_different_chromosomes_rate': 'tumour.alignment.pairs_on_different_chromosomes_rate',
4952
'tumour_oxoQ_score': 'tumour.alignment.oxoQ_score',
50-
'tumour_avg_depth': 'tumour.sanger.contamination.avg_depth',
51-
'tumour_estimated_coverage': 'tumour.alignment.estimated_coverage',
52-
'tumour_sanger_contamination': 'tumour.sanger.contamination.contamination',
53-
'tumour_mutect2_contamination': 'tumour.mutect2.contamination.contamination',
54-
'tumour_properly_paired_reads': 'tumour.alignment.properly_paired_reads',
55-
'tumour_total_reads': 'tumour.alignment.total_reads',
56-
'ascat_normal_contamination': 'tumour.sanger.ascat_metrics.NormalContamination',
57-
'ascat_ploidy': 'tumour.sanger.ascat_metrics.Ploidy',
53+
'tumour_insert_size_mean': 'tumour.alignment.average_insert_size'
54+
# 'tumour_avg_depth': 'tumour.sanger.contamination.avg_depth',
55+
# 'tumour_estimated_coverage': 'tumour.alignment.estimated_coverage',
56+
# 'tumour_sanger_contamination': 'tumour.sanger.contamination.contamination',
57+
# 'tumour_mutect2_contamination': 'tumour.mutect2.contamination.contamination',
58+
# 'tumour_properly_paired_reads': 'tumour.alignment.properly_paired_reads',
59+
# 'tumour_total_reads': 'tumour.alignment.total_reads',
60+
# 'ascat_normal_contamination': 'tumour.sanger.ascat_metrics.NormalContamination',
61+
# 'ascat_ploidy': 'tumour.sanger.ascat_metrics.Ploidy',
5862
# 'ascat_goodnessOfFit': 'tumour.sanger.ascat_metrics.goodnessOfFit',
5963
# 'ascat_psi': 'tumour.sanger.ascat_metrics.psi',
60-
'ascat_purity': 'tumour.sanger.ascat_metrics.rho',
61-
'mutect2_callable': 'tumour.mutect2.callable'
64+
# 'ascat_purity': 'tumour.sanger.ascat_metrics.rho',
65+
# 'mutect2_callable': 'tumour.mutect2.callable'
6266

6367
# 'cgpPindel_cpu_hours': 'tumour.sanger.timing.cgpPindel.cpu_hours',
6468
# 'cgpPindel_max_memory_usage_per_core': 'tumour.sanger.timing.cgpPindel.maximum_memory_usage_per_core',
@@ -195,6 +199,7 @@ def process_qc_metrics(song_dump, variant_calling_stats):
195199
if not analysis['samples'][0]['specimen']['tumourNormalDesignation'] == 'Tumour': continue
196200
studyId = analysis['studyId']
197201
sampleId = analysis['samples'][0]['sampleId']
202+
submitterSampleId = analysis['samples'][0]['submitterSampleId']
198203
matchedNormal = analysis['samples'][0]['matchedNormalSubmitterSampleId']
199204
experimental_strategy = analysis['experiment']['experimental_strategy'] if analysis['experiment'].get('experimental_strategy') else analysis['experiment']['library_strategy']
200205
normal_sample_id = '_'.join([studyId, experimental_strategy, matchedNormal])
@@ -228,6 +233,7 @@ def process_qc_metrics(song_dump, variant_calling_stats):
228233
},
229234
'tumour': {
230235
'sample_id': sampleId,
236+
'submitterSampleId': submitterSampleId,
231237
'alignment': {},
232238
'sanger': {
233239
'contamination': {},
@@ -332,7 +338,8 @@ def process_qc_metrics(song_dump, variant_calling_stats):
332338
metrics = get_extra_metrics(fname, extra_metrics, metrics)
333339

334340
for sa in sample_map[normal_sample_id]:
335-
variant_calling_stats[sa]['normal']['sample_id'] = analysis['samples'][0]['sampleId']
341+
variant_calling_stats[sa]['normal']['sample_id'] = analysis['samples'][0]['sampleId']
342+
variant_calling_stats[sa]['normal']['submitterSampleId'] = analysis['samples'][0]['submitterSampleId']
336343
variant_calling_stats[sa]['normal']['alignment'].update(metrics)
337344
variant_calling_stats[sa]['flags']['normal_aligned'] = True
338345
elif fl['dataType'] == 'OxoG Metrics':

gql_qc_report.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,12 @@ def process(gql_dump, analysisType, suppress):
2727
suppress_dict['donorId'] = analysis['donors'][0]['donorId']
2828
suppress_dict['sampleId'] = analysis['donors'][0]['specimens'][0]['samples'][0]['sampleId']
2929
suppress_dict['tumourNormalDesignation'] = analysis['donors'][0]['specimens'][0]['tumourNormalDesignation']
30-
suppress_dict['experimental_strategy'] = analysis['experiment']['experimental_strategy']
30+
suppress_dict['experimental_strategy'] = analysis['experiment']['experimental_strategy'] if analysis['experiment'].get('experimental_strategy') else analysis['experiment']['library_strategy']
3131
suppress_dict['run_input_analysisId'] = analysis.get('analysisId')
3232
suppress_dict['run_input_analysisType'] = analysis.get('analysisType')
3333

34+
if suppress_dict['tumourNormalDesignation'] == 'Normal' and suppress_dict['run_input_analysisType'] == 'sequencing_alignment': continue
35+
3436
for wf in wf_repo:
3537
complete_count = 0
3638
complete_latest = 0

0 commit comments

Comments
 (0)