-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathvalidate-bam.wdl
More file actions
93 lines (83 loc) · 2.65 KB
/
validate-bam.wdl
File metadata and controls
93 lines (83 loc) · 2.65 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
version 1.0
## Copyright Broad Institute, 2017
##
## This WDL performs format validation on SAM/BAM files in a list
##
## Requirements/expectations :
## - One or more SAM or BAM files to validate
## - Explicit request of either SUMMARY or VERBOSE mode in inputs.json
##
## Outputs:
## - Set of .txt files containing the validation reports, one per input file
##
## Cromwell version support
## - Successfully tested on v32
## - Does not work on versions < v23 due to output syntax
##
## Runtime parameters are optimized for Broad's Google Cloud Platform implementation.
## For program versions, see docker containers.
##
## LICENSING :
## This script is released under the WDL source code license (BSD-3) (see LICENSE in
## https://github.com/broadinstitute/wdl). Note however that the programs it calls may
## be subject to different licenses. Users are responsible for checking that they are
## authorized to run all programs before running this script. Please see the docker
## page at https://hub.docker.com/r/broadinstitute/genomes-in-the-cloud/ for detailed
## licensing information pertaining to the included programs.
# WORKFLOW DEFINITION
workflow ValidateBamsWf {
input {
Array[File] bam_array
String gatk_docker = "broadinstitute/gatk:latest"
String gatk_path = "/gatk/gatk"
}
# Process the input files in parallel
scatter (input_bam in bam_array) {
# Get the basename, i.e. strip the filepath and the extension
String bam_basename = basename(input_bam, ".bam")
# Run the validation
call ValidateBAM {
input:
input_bam = input_bam,
output_basename = bam_basename + ".validation",
docker = gatk_docker,
gatk_path = gatk_path
}
}
# Outputs that will be retained when execution is complete
output {
Array[File] validation_reports = ValidateBAM.validation_report
}
}
# TASK DEFINITIONS
# Validate a SAM or BAM using Picard ValidateSamFile
task ValidateBAM {
input {
# Command parameters
File input_bam
String output_basename
String? validation_mode
String gatk_path
# Runtime parameters
String docker
Int machine_mem_gb = 4
Int addtional_disk_space_gb = 50
}
Int disk_size = ceil(size(input_bam, "GB")) + addtional_disk_space_gb
String output_name = "${output_basename}_${validation_mode}.txt"
command {
${gatk_path} \
ValidateSamFile \
--INPUT ${input_bam} \
--OUTPUT ${output_name} \
--MODE ${default="SUMMARY" validation_mode}
}
runtime {
docker: docker
memory: machine_mem_gb + " GB"
disks: "local-disk " + disk_size + " HDD"
}
output {
File validation_report = "${output_name}"
}
}