diff --git a/modules/nf-core/pbmarkdup/environment.yml b/modules/nf-core/pbmarkdup/environment.yml new file mode 100644 index 00000000000..7e5a3099a71 --- /dev/null +++ b/modules/nf-core/pbmarkdup/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/sanger-tol/nf-core-modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::pbmarkdup=1.2.0 diff --git a/modules/nf-core/pbmarkdup/main.nf b/modules/nf-core/pbmarkdup/main.nf new file mode 100644 index 00000000000..20da004d21b --- /dev/null +++ b/modules/nf-core/pbmarkdup/main.nf @@ -0,0 +1,73 @@ +process PBMARKDUP { + tag "$meta.id" + label "process_high" + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pbmarkdup:1.2.0--h9ee0642_0' : + 'biocontainers/pbmarkdup:1.2.0--h9ee0642_0' }" + + input: + tuple val(meta), path(input) + + output: + tuple val(meta), path("${prefix}.${suffix}"), emit: markduped + tuple val(meta), path("${dupfile_name}") , emit: dupfile , optional: true + tuple val(meta), path("*.pbmarkdup.log") , emit: log , optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + suffix = input[0].getExtension() // To allow multiple input types + dupfile_name = args.contains('--dup-file') ? (args =~ /--dup-file\s+(\S+)/)[0][1] : '' + def log_args = args.contains('--log-level') ? " > ${prefix}.pbmarkdup.log" : '' + def file_list = input.collect { it.getName() }.join(' ') + + // Check file name collisions between input, output, and duplicate file + if (file_list.contains("${prefix}.${suffix}")) + error """Output file `${prefix}.${suffix}` conflicts with an input file. + Please change the output `$prefix` or input file names.""" + if (dupfile_name) { + if (file_list.contains(dupfile_name)) + error """Duplicate file `$dupfile_name` conflicts with an input file. + Please change the duplicate file name `$dupfile_name` or input file names.""" + + if (dupfile_name == "${prefix}.${suffix}") + error """Duplicate file `$dupfile_name` cannot be the same as the output file name. + Please change the duplicate file name `$dupfile_name` or output prefix `$prefix`.""" + } + + """ + pbmarkdup \\ + -j ${task.cpus} \\ + ${file_list} \\ + ${prefix}.${suffix} \\ + $args \\ + ${log_args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pbmarkdup: \$(echo \$(pbmarkdup --version 2>&1) | awk 'BEFORE{FS=" "}{print \$2}') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + suffix = input[0].getExtension() // To allow multiple input types + dupfile_name = args.contains('--dup-file') ? (args =~ /--dup-file\s+(\S+)/)[0][1] : '' + def log_args = args.contains('--log-level') ? " > ${prefix}.pbmarkdup.log" : '' + def file_list = input.collect { it.getName() }.join(' ') + """ + touch ${prefix}.${suffix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pbmarkdup: \$(echo \$(pbmarkdup --version 2>&1) | awk 'BEFORE{FS=" "}{print \$2}') + END_VERSIONS + """ +} diff --git a/modules/nf-core/pbmarkdup/meta.yml b/modules/nf-core/pbmarkdup/meta.yml new file mode 100644 index 00000000000..59a1de05425 --- /dev/null +++ b/modules/nf-core/pbmarkdup/meta.yml @@ -0,0 +1,90 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "pbmarkdup" +description: | + Takes one or multiple sequencing chips of an amplified library as HiFi reads and marks or removes + duplicates. +keywords: + - markdup + - bam + - fastq + - fasta +tools: + - pbmarkdup: + description: | + pbmarkdup identifies and marks duplicate reads in PacBio HiFi (CCS) data. It clusters + highly similar CCS reads to detect PCR duplicates and flags them in the output files + (BAM,FASTQ,FASTA) (duplicate bit 0x400), optionally removing duplicates. + (duplicate bit 0x400), optionally removing duplicates. + homepage: https://github.com/PacificBiosciences/pbmarkdup + documentation: https://github.com/PacificBiosciences/pbmarkdup + licence: ["BSD-3-Clause"] + identifier: biotools:pbmarkdup +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - input: + type: file + description: | + Sequencing reads in BAM, FASTQ, or FASTA format. + pattern: "*.{bam,f*a,/.*f.*\\.gz/}" + ontologies: + - edam: http://edamontology.org/format_2546 # FASTA-like + - edam: "http://edamontology.org/format_1930" # FASTQ + - edam: "http://edamontology.org/format_2572" # BAM +output: + markduped: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - ${prefix}.${suffix}: + type: file + description: | + Markduplicated sequencing reads in the same format as the input file. + pattern: "*.{bam,f*a,/.*f.*\\.gz/}" + ontologies: + - edam: http://edamontology.org/format_2546 # FASTA-like + - edam: "http://edamontology.org/format_1930" # FASTQ + - edam: "http://edamontology.org/format_2572" # BAM + dupfile: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - ${dupfile_name}: + type: file + description: | + (Optional) File listing duplicate reads (Specify by --dup-file). + pattern: "*.{bam,f*a,/.*f.*\\.gz/}" + ontologies: + - edam: http://edamontology.org/format_2546 # FASTA-like + - edam: "http://edamontology.org/format_1930" # FASTQ + - edam: "http://edamontology.org/format_2572" # BAM + log: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*.pbmarkdup.log": + type: file + description: | + Log file generated by pbmarkdup (if --log-level is specified). + pattern: "*.pbmarkdup.log" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@sainsachiko" +maintainers: + - "@sainsachiko" diff --git a/modules/nf-core/pbmarkdup/tests/main.nf.test b/modules/nf-core/pbmarkdup/tests/main.nf.test new file mode 100644 index 00000000000..dbe78c3517c --- /dev/null +++ b/modules/nf-core/pbmarkdup/tests/main.nf.test @@ -0,0 +1,133 @@ + +nextflow_process { + + name "Test Process PBMARKDUP" + script "../main.nf" + process "PBMARKDUP" + + tag "modules" + tag "modules_nfcore" + tag "pbmarkdup" + + config './nextflow.config' + + test("deilephila porcellus - fasta") { + + when { + + params { + pbmarkdup_args = "--clobber" + } + + process { + """ + input[0] = Channel.of( + [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/eukaryotes/deilephila_porcellus/mito/ilDeiPorc1.HiFi.reads.fa', checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("acropora cervicornis - bam - multiple tests with dupfile and log") { + when { + + params { + pbmarkdup_args = "--clobber --dup-file ${prefix}.dup.bam --log-level INFO" + } + + process { + """ + input[0] = Channel.of( + [ + [ id:'test' ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/eukaryotes/acropora_cervicornis/m84093_241116_151316_s2.hifi_reads.bc2028.subset.1.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/eukaryotes/acropora_cervicornis/m84093_241116_151316_s2.hifi_reads.bc2028.subset.2.bam', checkIfExists: true) + ] + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("acropora cervicornis - bam - multiple tests remove duplicates") { + when { + + params { + pbmarkdup_args = "--clobber --rmdup" + } + + process { + """ + input[0] = Channel.of( + [ + [ id:'test' ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/eukaryotes/acropora_cervicornis/m84093_241116_151316_s2.hifi_reads.bc2028.subset.1.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/eukaryotes/acropora_cervicornis/m84093_241116_151316_s2.hifi_reads.bc2028.subset.2.bam', checkIfExists: true) + ] + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("deilephila porcellus - stub") { + + options "-stub" + + when { + params { + pbmarkdup_args = "" + } + + process { + """ + input[0] = Channel.of( + [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/eukaryotes/deilephila_porcellus/mito/ilDeiPorc1.HiFi.reads.fa', checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/pbmarkdup/tests/main.nf.test.snap b/modules/nf-core/pbmarkdup/tests/main.nf.test.snap new file mode 100644 index 00000000000..705b7dd4f2a --- /dev/null +++ b/modules/nf-core/pbmarkdup/tests/main.nf.test.snap @@ -0,0 +1,202 @@ +{ + "acropora cervicornis - bam - multiple tests remove duplicates": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bam:md5,86e22a794d904cc48cb3758a03883ba1" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,832e36b56615fb29a94b16e4db32b8db" + ], + "dupfile": [ + + ], + "log": [ + + ], + "markduped": [ + [ + { + "id": "test" + }, + "test.bam:md5,86e22a794d904cc48cb3758a03883ba1" + ] + ], + "versions": [ + "versions.yml:md5,832e36b56615fb29a94b16e4db32b8db" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2025-11-27T22:25:53.428359" + }, + "acropora cervicornis - bam - multiple tests with dupfile and log": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bam:md5,86e22a794d904cc48cb3758a03883ba1" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "null.dup.bam:md5,3b74225ad5f7e9e1cbafc45132ad82fb" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "test.pbmarkdup.log:md5,99987a1331d01b59aa3b5ccd1c787906" + ] + ], + "3": [ + "versions.yml:md5,832e36b56615fb29a94b16e4db32b8db" + ], + "dupfile": [ + [ + { + "id": "test" + }, + "null.dup.bam:md5,3b74225ad5f7e9e1cbafc45132ad82fb" + ] + ], + "log": [ + [ + { + "id": "test" + }, + "test.pbmarkdup.log:md5,99987a1331d01b59aa3b5ccd1c787906" + ] + ], + "markduped": [ + [ + { + "id": "test" + }, + "test.bam:md5,86e22a794d904cc48cb3758a03883ba1" + ] + ], + "versions": [ + "versions.yml:md5,832e36b56615fb29a94b16e4db32b8db" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2025-11-27T22:25:23.374664" + }, + "deilephila porcellus - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.fa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,832e36b56615fb29a94b16e4db32b8db" + ], + "dupfile": [ + + ], + "log": [ + + ], + "markduped": [ + [ + { + "id": "test" + }, + "test.fa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,832e36b56615fb29a94b16e4db32b8db" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2025-11-27T22:26:16.491708" + }, + "deilephila porcellus - fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.fa:md5,087cee5291f8d728a62b91765b64af35" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,832e36b56615fb29a94b16e4db32b8db" + ], + "dupfile": [ + + ], + "log": [ + + ], + "markduped": [ + [ + { + "id": "test" + }, + "test.fa:md5,087cee5291f8d728a62b91765b64af35" + ] + ], + "versions": [ + "versions.yml:md5,832e36b56615fb29a94b16e4db32b8db" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2025-11-27T22:47:33.595865" + } +} \ No newline at end of file diff --git a/modules/nf-core/pbmarkdup/tests/nextflow.config b/modules/nf-core/pbmarkdup/tests/nextflow.config new file mode 100644 index 00000000000..dc9b092f013 --- /dev/null +++ b/modules/nf-core/pbmarkdup/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: PBMARKDUP { + ext.args = { "${params.pbmarkdup_args}" } + } +}