diff --git a/CHANGELOG.md b/CHANGELOG.md index c3cab8fd7f..98ba277453 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#1340](https://github.com/nf-core/sarek/pull/1340) - Adds Azure test profiles and megatests. - [#1372](https://github.com/nf-core/sarek/pull/1372) - Add NCBench test profile for Agilent datasets - [#1409](https://github.com/nf-core/sarek/pull/1409) - Add params `modules_testdata_base_path` to test profile +- [#1448](https://github.com/nf-core/sarek/pull/1448) - Internal benchmarking of germline small variants ### Changed diff --git a/modules.json b/modules.json index bdd0e0a720..dae3f54f36 100644 --- a/modules.json +++ b/modules.json @@ -8,476 +8,689 @@ "ascat": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bcftools/annotate": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/bcftools/annotate/bcftools-annotate.diff" }, "bcftools/concat": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bcftools/mpileup": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["bam_ngscheckmate"] + "installed_by": [ + "bam_ngscheckmate" + ] }, "bcftools/sort": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bcftools/stats": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bwa/index": { "branch": "master", "git_sha": "6278bf9afd4a4b2d00fa6052250e73da3d91546f", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bwa/mem": { "branch": "master", "git_sha": "5908e575322666ccc33911a28b06e3f82260fe54", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bwamem2/index": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bwamem2/mem": { "branch": "master", "git_sha": "74363e1acc38eaedeede8d429477397c1a6f9e18", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "cat/cat": { "branch": "master", "git_sha": "9437e6053dccf4aafa022bfd6e7e9de67e625af8", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "cat/fastq": { "branch": "master", "git_sha": "0997b47c93c06b49aa7b3fefda87e728312cf2ca", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "cnvkit/antitarget": { "branch": "master", "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "cnvkit/batch": { "branch": "master", "git_sha": "76c858e2589c7762c68d7d14191d78f6db98a445", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "cnvkit/genemetrics": { "branch": "master", "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "cnvkit/reference": { "branch": "master", "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "controlfreec/assesssignificance": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/controlfreec/assesssignificance/controlfreec-assesssignificance.diff" }, "controlfreec/freec": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "controlfreec/freec2bed": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "controlfreec/freec2circos": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "controlfreec/makegraph2": { "branch": "master", "git_sha": "a7bae48d8bccfae99e3b862fa07bbd50a8df6b82", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "deepvariant": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "dragmap/align": { "branch": "master", "git_sha": "9a7efcec9b21baf1c67342faa6b8d905c972207f", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/dragmap/align/dragmap-align.diff" }, "dragmap/hashtable": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/dragmap/hashtable/dragmap-hashtable.diff" }, "ensemblvep/download": { "branch": "master", "git_sha": "214d575774c172062924ad3564b4f66655600730", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "ensemblvep/vep": { "branch": "master", "git_sha": "214d575774c172062924ad3564b4f66655600730", - "installed_by": ["vcf_annotate_ensemblvep", "modules"] + "installed_by": [ + "vcf_annotate_ensemblvep", + "modules" + ] }, "fastp": { "branch": "master", "git_sha": "3c77ca9aac783e76c3614a06db3bfe4fef619bde", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "fastqc": { "branch": "master", "git_sha": "f4ae1d942bd50c5c0b9bd2de1393ce38315ba57c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "fgbio/callmolecularconsensusreads": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "fgbio/fastqtobam": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "fgbio/groupreadsbyumi": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "freebayes": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/applybqsr": { "branch": "master", "git_sha": "af273ea6618c50e82c372abe18b0a225e84fe6f7", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/applyvqsr": { "branch": "master", "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/baserecalibrator": { "branch": "master", "git_sha": "8a223e11d4e6deb36484e01891eae9c1cacb5f5d", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/calculatecontamination": { "branch": "master", "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/cnnscorevariants": { "branch": "master", "git_sha": "87e46f8fe8b056486a80c14b1d61e7cd6046bc06", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/createsequencedictionary": { "branch": "master", "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/estimatelibrarycomplexity": { "branch": "master", "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/filtermutectcalls": { "branch": "master", "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/filtervarianttranches": { "branch": "master", "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/gatherbqsrreports": { "branch": "master", "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/gatherpileupsummaries": { "branch": "master", "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/genomicsdbimport": { "branch": "master", "git_sha": "194fca815cf594646e638fa5476acbcc296f1850", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/genotypegvcfs": { "branch": "master", "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/getpileupsummaries": { "branch": "master", "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/haplotypecaller": { "branch": "master", "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/intervallisttobed": { "branch": "master", "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/learnreadorientationmodel": { "branch": "master", "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/markduplicates": { "branch": "master", "git_sha": "194fca815cf594646e638fa5476acbcc296f1850", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/mergemutectstats": { "branch": "master", "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/mergevcfs": { "branch": "master", "git_sha": "194fca815cf594646e638fa5476acbcc296f1850", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/mutect2": { "branch": "master", "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/variantrecalibrator": { "branch": "master", "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4spark/applybqsr": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4spark/baserecalibrator": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4spark/markduplicates": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] + }, + "happy/happy": { + "branch": "master", + "git_sha": "bf78e7d93f5d08ed464f74ba85a03b2ad6112f04", + "installed_by": [ + "modules" + ] }, "manta/germline": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "manta/somatic": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "manta/tumoronly": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "mosdepth": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "msisensorpro/msisomatic": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "msisensorpro/scan": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "multiqc": { "branch": "master", "git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "ngscheckmate/ncm": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["bam_ngscheckmate"] + "installed_by": [ + "bam_ngscheckmate" + ] + }, + "rtgtools/rocplot": { + "branch": "master", + "git_sha": "94db1a531781edffada2cd3ea0913937a605eb48", + "installed_by": [ + "modules" + ] + }, + "rtgtools/vcfeval": { + "branch": "master", + "git_sha": "1274846bc23f0a07cc7e2bce8c6030923143da00", + "installed_by": [ + "modules" + ] }, "samblaster": { "branch": "master", "git_sha": "8ad1f37288c6f5a36871c169aa85614832152fd1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/bam2fq": { "branch": "master", "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/collatefastq": { "branch": "master", "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/convert": { "branch": "master", "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/faidx": { "branch": "master", "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/index": { "branch": "master", "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/merge": { "branch": "master", "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/mpileup": { "branch": "master", "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/stats": { "branch": "master", "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/view": { "branch": "master", "git_sha": "0bd7d2333a88483aa0476acea172e9f5f6dd83bb", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "sentieon/applyvarcal": { "branch": "master", "git_sha": "220da1aa7d6ab6555817035041dd2fc05cb518d3", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "sentieon/bwamem": { "branch": "master", "git_sha": "220da1aa7d6ab6555817035041dd2fc05cb518d3", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "sentieon/dedup": { "branch": "master", "git_sha": "220da1aa7d6ab6555817035041dd2fc05cb518d3", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "sentieon/dnamodelapply": { "branch": "master", "git_sha": "220da1aa7d6ab6555817035041dd2fc05cb518d3", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "sentieon/dnascope": { "branch": "master", "git_sha": "220da1aa7d6ab6555817035041dd2fc05cb518d3", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "sentieon/gvcftyper": { "branch": "master", "git_sha": "220da1aa7d6ab6555817035041dd2fc05cb518d3", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "sentieon/haplotyper": { "branch": "master", "git_sha": "220da1aa7d6ab6555817035041dd2fc05cb518d3", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "sentieon/varcal": { "branch": "master", "git_sha": "220da1aa7d6ab6555817035041dd2fc05cb518d3", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "snpeff/download": { "branch": "master", "git_sha": "214d575774c172062924ad3564b4f66655600730", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "snpeff/snpeff": { "branch": "master", "git_sha": "214d575774c172062924ad3564b4f66655600730", - "installed_by": ["modules", "vcf_annotate_snpeff"] + "installed_by": [ + "modules", + "vcf_annotate_snpeff" + ] }, "strelka/germline": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "strelka/somatic": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "svdb/merge": { "branch": "master", "git_sha": "8c498400bfa0f0e5c1750fab21496a27bc8d9ad0", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "tabix/bgziptabix": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules", "vcf_annotate_snpeff"] + "installed_by": [ + "modules", + "vcf_annotate_snpeff" + ] }, "tabix/tabix": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["vcf_annotate_ensemblvep", "modules"] + "installed_by": [ + "vcf_annotate_ensemblvep", + "modules" + ] }, "tiddit/sv": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "untar": { "branch": "master", "git_sha": "e719354ba77df0a1bd310836aa2039b45c29d620", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "unzip": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "vcftools": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] } } }, @@ -486,35 +699,47 @@ "bam_ngscheckmate": { "branch": "master", "git_sha": "cfd937a668919d948f6fcbf4218e79de50c2f36f", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "utils_nextflow_pipeline": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "utils_nfcore_pipeline": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "utils_nfvalidation_plugin": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "vcf_annotate_ensemblvep": { "branch": "master", "git_sha": "cfd937a668919d948f6fcbf4218e79de50c2f36f", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "vcf_annotate_snpeff": { "branch": "master", "git_sha": "cfd937a668919d948f6fcbf4218e79de50c2f36f", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] } } } } } -} +} \ No newline at end of file diff --git a/modules/nf-core/rtgtools/rocplot/main.nf b/modules/nf-core/rtgtools/rocplot/main.nf new file mode 100644 index 0000000000..db90039a09 --- /dev/null +++ b/modules/nf-core/rtgtools/rocplot/main.nf @@ -0,0 +1,49 @@ +process RTGTOOLS_ROCPLOT { + tag "$meta.id" + label 'process_single' + + conda "bioconda::rtg-tools=3.12.1" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/rtg-tools:3.12.1--hdfd78af_0': + 'quay.io/biocontainers/rtg-tools:3.12.1--hdfd78af_0' }" + + input: + tuple val(meta), path(input) + + output: + tuple val(meta), path("*.png"), emit: png + tuple val(meta), path("*.svg"), emit: svg + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def avail_mem = task.memory.toGiga() + "G" + """ + rtg RTG_MEM=${avail_mem} rocplot \\ + ${args} \\ + --png ${prefix}.png \\ + --svg ${prefix}.svg \\ + ${input} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rtg-tools: \$(echo \$(rtg version | head -n 1 | awk '{print \$4}')) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.png + touch ${prefix}.svg + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rtg-tools: \$(echo \$(rtg version | head -n 1 | awk '{print \$4}')) + END_VERSIONS + """ +} diff --git a/modules/nf-core/rtgtools/rocplot/meta.yml b/modules/nf-core/rtgtools/rocplot/meta.yml new file mode 100644 index 0000000000..b46c490528 --- /dev/null +++ b/modules/nf-core/rtgtools/rocplot/meta.yml @@ -0,0 +1,47 @@ +name: "rtgtools_rocplot" +description: Plot ROC curves from vcfeval ROC data files, either to an image, or an interactive GUI. The interactive GUI isn't possible for nextflow. +keywords: + - rtgtools + - rocplot + - validation + - vcf +tools: + - "rtgtools": + description: "RealTimeGenomics Tools -- Utilities for accurate VCF comparison and manipulation" + homepage: "https://www.realtimegenomics.com/products/rtg-tools" + documentation: "https://github.com/RealTimeGenomics/rtg-tools" + tool_dev_url: "https://github.com/RealTimeGenomics/rtg-tools" + licence: "['BSD']" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: Input TSV ROC files created with RTGTOOLS_VCFEVAL + pattern: "*.tsv.gz" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - png: + type: file + description: The resulting rocplot in PNG format + pattern: "*.png" + - svg: + type: file + description: The resulting rocplot in SVG format + pattern: "*.svg" + +authors: + - "@nvnieuwk" diff --git a/modules/nf-core/rtgtools/vcfeval/main.nf b/modules/nf-core/rtgtools/vcfeval/main.nf new file mode 100644 index 0000000000..9e73e902d3 --- /dev/null +++ b/modules/nf-core/rtgtools/vcfeval/main.nf @@ -0,0 +1,66 @@ +process RTGTOOLS_VCFEVAL { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::rtg-tools=3.12.1" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/rtg-tools:3.12.1--hdfd78af_0': + 'quay.io/biocontainers/rtg-tools:3.12.1--hdfd78af_0' }" + + input: + tuple val(meta), path(query_vcf), path(query_vcf_tbi), path(truth_vcf), path(truth_vcf_tbi), path(truth_bed), path(evaluation_bed) + tuple val(meta2), path(sdf) + + output: + tuple val(meta), path("*.tp.vcf.gz") , emit: tp_vcf + tuple val(meta), path("*.tp.vcf.gz.tbi") , emit: tp_tbi + tuple val(meta), path("*.fn.vcf.gz") , emit: fn_vcf + tuple val(meta), path("*.fn.vcf.gz.tbi") , emit: fn_tbi + tuple val(meta), path("*.fp.vcf.gz") , emit: fp_vcf + tuple val(meta), path("*.fp.vcf.gz.tbi") , emit: fp_tbi + tuple val(meta), path("*.tp-baseline.vcf.gz") , emit: baseline_vcf + tuple val(meta), path("*.tp-baseline.vcf.gz.tbi") , emit: baseline_tbi + tuple val(meta), path("*.snp_roc.tsv.gz") , emit: snp_roc + tuple val(meta), path("*.non_snp_roc.tsv.gz") , emit: non_snp_roc + tuple val(meta), path("*.weighted_roc.tsv.gz") , emit: weighted_roc + tuple val(meta), path("*.summary.txt") , emit: summary + tuple val(meta), path("*.phasing.txt") , emit: phasing + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: "" + def prefix = task.ext.prefix ?: "${meta.id}" + def bed_regions = truth_bed ? "--bed-regions=${truth_bed}" : "" + def eval_regions = evaluation_bed ? "--evaluation-regions=${evaluation_bed}" : "" + def truth_index = truth_vcf_tbi ? "" : "rtg index ${truth_vcf}" + def query_index = query_vcf_tbi ? "" : "rtg index ${query_vcf}" + def avail_mem = task.memory.toGiga() + "G" + + """ + ${truth_index} + ${query_index} + + rtg RTG_MEM=$avail_mem vcfeval \\ + ${args} \\ + --baseline=${truth_vcf} \\ + ${bed_regions} \\ + ${eval_regions} \\ + --calls=${query_vcf} \\ + --output=output \\ + --template=${sdf} \\ + --threads=${task.cpus} + + cd output/ + mv done progress .. + for f in * ; do mv "\$f" "../${prefix}.\$f" ; done + cd .. + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rtg-tools: \$(echo \$(rtg version | head -n 1 | awk '{print \$4}')) + END_VERSIONS + """ +} diff --git a/modules/nf-core/rtgtools/vcfeval/meta.yml b/modules/nf-core/rtgtools/vcfeval/meta.yml new file mode 100644 index 0000000000..ad7c2c01b0 --- /dev/null +++ b/modules/nf-core/rtgtools/vcfeval/meta.yml @@ -0,0 +1,114 @@ +name: "rtgtools_vcfeval" +description: The VCFeval tool of RTG tools. It is used to evaluate called variants for agreement with a baseline variant set +keywords: + - benchmarking + - vcf + - rtg-tools +tools: + - "rtgtools": + description: "RealTimeGenomics Tools -- Utilities for accurate VCF comparison and manipulation" + homepage: "https://www.realtimegenomics.com/products/rtg-tools" + documentation: "https://github.com/RealTimeGenomics/rtg-tools" + tool_dev_url: "https://github.com/RealTimeGenomics/rtg-tools" + doi: "" + licence: "['BSD']" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - query_vcf: + type: file + description: A VCF with called variants to benchmark against the standard + pattern: "*.{vcf,vcf.gz}" + - query_vcf_index: + type: file + description: The index of the called VCF (optional) + pattern: "*.tbi" + - truth_vcf: + type: file + description: A standard VCF to compare against + pattern: "*.{vcf,vcf.gz}" + - truth_vcf_index: + type: file + description: The index of the standard VCF (optional) + pattern: "*.tbi" + - truth_bed: + type: file + description: A BED file containining the strict regions where VCFeval should only evaluate the fully overlapping variants (optional) + pattern: "*.bed" + - evaluation_bed: + type: file + description: A BED file containing the regions where VCFeval will evaluate every fully and partially overlapping variant (optional) + pattern: "*.bed" + - sdf: + type: file + description: The SDF (RTG Sequence Data File) folder of the reference genome + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - tp_vcf: + type: file + description: A VCF file for the true positive variants + pattern: "*.tp.vcf.gz" + - tp_tbi: + type: file + description: The index of the VCF file for the true positive variants + pattern: "*.tp.vcf.gz.tbi" + - fn_vcf: + type: file + description: A VCF file for the false negative variants + pattern: "*.fn.vcf.gz" + - fn_tbi: + type: file + description: The index of the VCF file for the false negative variants + pattern: "*.fn.vcf.gz.tbi" + - fp_vcf: + type: file + description: A VCF file for the false positive variants + pattern: "*.fp.vcf.gz" + - fp_tbi: + type: file + description: The index of the VCF file for the false positive variants + pattern: "*.fp.vcf.gz.tbi" + - baseline_vcf: + type: file + description: A VCF file for the true positive variants from the baseline + pattern: "*.tp-baseline.vcf.gz" + - baseline_tbi: + type: file + description: The index of the VCF file for the true positive variants from the baseline + pattern: "*.tp-baseline.vcf.gz.tbi" + - snp_roc: + type: file + description: TSV files containing ROC data for the SNPs + pattern: "*.snp_roc.tsv.gz" + - non_snp_roc: + type: file + description: TSV files containing ROC data for all variants except SNPs + pattern: "*.non_snp_roc.tsv.gz" + - weighted_roc: + type: file + description: TSV files containing weighted ROC data for all variants + pattern: "*.weighted_snp_roc.tsv.gz" + - summary: + type: file + description: A TXT file containing the summary of the evaluation + pattern: "*.summary.txt" + - phasing: + type: file + description: A TXT file containing the data on the phasing + pattern: "*.phasing.txt" + +authors: + - "@nvnieuwk" diff --git a/nextflow.config b/nextflow.config index e9dabd1136..b04f1eacea 100644 --- a/nextflow.config +++ b/nextflow.config @@ -98,6 +98,9 @@ params { vep_spliceregion = null // spliceregion plugin disabled within VEP vep_version = "110.0-0" // Should be updated when we update VEP, needs this to get full path to some plugins + // Special + benchmark = true + // MultiQC options multiqc_config = null multiqc_title = null diff --git a/subworkflows/local/vcf_benchmark_small_variants/main.nf b/subworkflows/local/vcf_benchmark_small_variants/main.nf new file mode 100644 index 0000000000..2dd942395e --- /dev/null +++ b/subworkflows/local/vcf_benchmark_small_variants/main.nf @@ -0,0 +1,40 @@ +// +// SMALL_GERMLINE_BENCHMARK: SUBWORKFLOW FOR SMALL GERMLINE VARIANTS +// + +include { RTGTOOLS_VCFEVAL } from '../../modules/nf-core/rtgtools/vcfeval/main' + + +workflow VCF_BENCHMARK_SMALL_VARIANTS { + take: + ch_test // channel: test vcf coming from pipeline [val(meta), test.vcf.gz, test.vcf.gz.tbi] + ch_truth // channel: truth vcf [val(meta), truth.vcf.gz, truth.vcf.gz.tbi] + ch_bed // channel: bed file [val(meta), target.bed] //TODO: is optional for rtgvcfeval -> remove? + + main: + versions = Channel.empty() + summary_reports = Channel.empty() + + // apply rtgtools eval method + RTGTOOLS_VCFEVAL( + // TODO: correct mapping according to input channels + input_ch.map { meta, vcf, tbi, truth_vcf, truth_tbi, bed -> + [ meta, vcf, tbi, truth_vcf, truth_tbi, bed, [] ] + }, + [ [], [] ] + ) + versions = versions.mix(RTGTOOLS_VCFEVAL.out.versions.first()) + + // collect summary reports + RTGTOOLS_VCFEVAL.out.summary + .map { meta, file -> tuple([vartype: meta.vartype] + [benchmark_tool: "rtgtools"], file) } + .groupTuple() + .set{ report } + + summary_reports = summary_reports.mix(report) + + emit: + versions + summary_reports + +} diff --git a/subworkflows/local/vcf_validate_small_variants/main.nf b/subworkflows/local/vcf_validate_small_variants/main.nf new file mode 100644 index 0000000000..a92377d259 --- /dev/null +++ b/subworkflows/local/vcf_validate_small_variants/main.nf @@ -0,0 +1,28 @@ +// +// Validation against truth. +// + +include { RTGTOOLS_VCFEVAL } from '../../modules/nf-core/rtgtools/vcfeval/main' +// or do it with hap.py??? + +workflow VCF_VALIDATE_SMALL_VARIANTS { + + take: + + main: + versions = Channel.empty() + // input: of rtgtools/vcfeval + // tuple val(meta), path(query_vcf), path(query_vcf_tbi), path(truth_vcf), path(truth_vcf_tbi), path(truth_bed), path(evaluation_bed) + // query_vcf = generated by sarek run + // truth_vcf = denoted in test_data or somewhere + // tuple val(meta2), path(sdf) + + // if benchmark = true then do "normal" sarek run and compare to truth sample here + // needs param which truth sample was used maybe? + // get truth, normalize truth + + RTGTOOLS_VCFEVAL ( ch_vcfeval_in, ch_sdf ) + + emit: + versions = ch_versions // channel: [ path(versions.yml) ] +} diff --git a/workflows/sarek.nf b/workflows/sarek.nf index c94ee48918..c2923e2f1c 100644 --- a/workflows/sarek.nf +++ b/workflows/sarek.nf @@ -217,11 +217,14 @@ include { POST_VARIANTCALLING } from '../subworkflows/lo include { VCF_QC_BCFTOOLS_VCFTOOLS } from '../subworkflows/local/vcf_qc_bcftools_vcftools/main' // Sample QC on CRAM files -include { CRAM_SAMPLEQC } from '../subworkflows/local/cram_sampleqc/main' +include { CRAM_SAMPLEQC } from '../subworkflows/local/cram_sampleqc/main' // Annotation include { VCF_ANNOTATE_ALL } from '../subworkflows/local/vcf_annotate_all/main' +// Validation (experimental) +include { VCF_VALIDATE_SMALL_VARIANTS } from '../subworkflows/nf-core/vcf_eval/main' + // MULTIQC include { MULTIQC } from '../modules/nf-core/multiqc/main' @@ -1077,6 +1080,11 @@ workflow SAREK { vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_TUMOR_ONLY_ALL.out.vcf_all) vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_SOMATIC_ALL.out.vcf_all) + if(params.benchmark) { + VCF_VALIDATE_SMALL_VARIANTS(BAM_VARIANT_CALLING_GERMLINE_ALL.out.vcf_all) + VCF_BENCHMARK_SMALL_VARIANTS(BAM_VARIANT_CALLING_GERMLINE_ALL.out.vcf_all) + } + // QC VCF_QC_BCFTOOLS_VCFTOOLS(vcf_to_annotate, intervals_bed_combined)