From 2ad3c794d5a5d44a8a3003d0abf1462914b87d00 Mon Sep 17 00:00:00 2001 From: sainsachiko Date: Tue, 25 Nov 2025 17:28:33 +0000 Subject: [PATCH 1/8] Add module pbmarkdup --- modules/nf-core/pbmarkdup/environment.yml | 7 ++ modules/nf-core/pbmarkdup/main.nf | 50 +++++++++ modules/nf-core/pbmarkdup/meta.yml | 58 ++++++++++ modules/nf-core/pbmarkdup/tests/main.nf.test | 91 ++++++++++++++++ .../nf-core/pbmarkdup/tests/main.nf.test.snap | 101 ++++++++++++++++++ .../nf-core/pbmarkdup/tests/nextflow.config | 5 + 6 files changed, 312 insertions(+) create mode 100644 modules/nf-core/pbmarkdup/environment.yml create mode 100644 modules/nf-core/pbmarkdup/main.nf create mode 100644 modules/nf-core/pbmarkdup/meta.yml create mode 100644 modules/nf-core/pbmarkdup/tests/main.nf.test create mode 100644 modules/nf-core/pbmarkdup/tests/main.nf.test.snap create mode 100644 modules/nf-core/pbmarkdup/tests/nextflow.config diff --git a/modules/nf-core/pbmarkdup/environment.yml b/modules/nf-core/pbmarkdup/environment.yml new file mode 100644 index 000000000000..4484c5763827 --- /dev/null +++ b/modules/nf-core/pbmarkdup/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/sanger-tol/nf-core-modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::pbmarkdup==1.2.0 \ No newline at end of file diff --git a/modules/nf-core/pbmarkdup/main.nf b/modules/nf-core/pbmarkdup/main.nf new file mode 100644 index 000000000000..e6640bd1e900 --- /dev/null +++ b/modules/nf-core/pbmarkdup/main.nf @@ -0,0 +1,50 @@ +process PBMARKDUP { + tag "$meta.id" + label "process_high" + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pbmarkdup:1.2.0--h9ee0642_0' : + 'biocontainers/pbmarkdup:1.2.0--h9ee0642_0' }" + + input: + tuple val(meta), path(input) + + output: + tuple val(meta), path("*.{bam,f*a,/.*f.*\\.gz/}") , emit: markduped + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + def prefix = task.ext.prefix ?: "${meta.id}" + def suffix = input.getExtension() + + """ + pbmarkdup \\ + -j ${task.cpus} \\ + $input \\ + ${prefix}.${suffix} \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pbmarkdup: \$(echo \$(pbmarkdup --version 2>&1) | awk 'BEFORE{FS=" "}{print \$2}') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def suffix = input.getExtension() + """ + touch ${prefix}.${suffix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pbmarkdup: \$(echo \$(pbmarkdup --version 2>&1) | awk 'BEFORE{FS=" "}{print \$2}') + END_VERSIONS + """ +} diff --git a/modules/nf-core/pbmarkdup/meta.yml b/modules/nf-core/pbmarkdup/meta.yml new file mode 100644 index 000000000000..3dcde7b27b99 --- /dev/null +++ b/modules/nf-core/pbmarkdup/meta.yml @@ -0,0 +1,58 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "pbmarkdup" +description: | + Takes one or multiple sequencing chips of an amplified library as HiFi reads and marks or removes + duplicates. +keywords: + - markdup + - bam + - fastq + - fasta +tools: + - pbmarkdup: + description: | + pbmarkdup identifies and marks duplicate reads in PacBio HiFi (CCS) data. It clusters + highly similar CCS reads to detect PCR duplicates and flags them in the BAM output + (duplicate bit 0x400), optionally removing duplicates. + homepage: https://github.com/PacificBiosciences/pbmarkdup + documentation: https://github.com/PacificBiosciences/pbmarkdup + licence: ["BSD-3-Clause"] + identifier: biotools:pbmarkdup +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - input: + type: file + description: | + Sequencing reads in BAM, FASTQ, or FASTA format. + pattern: "*.{bam,f*a,/.*f.*\\.gz/}" + ontologies: + - edam: http://edamontology.org/format_2546 # FASTA-like +output: + markduped: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*.${suffix}": + type: file + description: | + Markduplicated sequencing reads in the same format as the input file. + pattern: "*.{bam,f*a,/.*f.*\\.gz/}" + ontologies: + - edam: http://edamontology.org/format_3003 + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@sainsachiko" +maintainers: + - "@sainsachiko" \ No newline at end of file diff --git a/modules/nf-core/pbmarkdup/tests/main.nf.test b/modules/nf-core/pbmarkdup/tests/main.nf.test new file mode 100644 index 000000000000..fe9a17389be6 --- /dev/null +++ b/modules/nf-core/pbmarkdup/tests/main.nf.test @@ -0,0 +1,91 @@ + +nextflow_process { + + name "Test Process PBMARKDUP" + script "../main.nf" + process "PBMARKDUP" + + tag "modules" + tag "modules_sangertol" + tag "pbmarkdup" + + config './nextflow.config' + + test("deilephila porcellus - fasta") { + + when { + + process { + """ + input[0] = Channel.of( + [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/eukaryotes/deilephila_porcellus/mito/ilDeiPorc1.HiFi.reads.fa', checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("acropora cervicornis - bam") { + + when { + + process { + """ + input[0] = Channel.of( + [ + [ id:'test' ], // meta map + file('https://github.com/sainsachiko/test-datasets/raw/refs/heads/modules/data/genomics/eukaryotes/acropora_cervicornis/m84093_241116_151316_s2.hifi_reads.bc2028.resubset.bam', checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("deilephila porcellus - stub") { + + options "-stub" + + when { + + process { + """ + input[0] = Channel.of( + [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/eukaryotes/deilephila_porcellus/mito/ilDeiPorc1.HiFi.reads.fa', checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/pbmarkdup/tests/main.nf.test.snap b/modules/nf-core/pbmarkdup/tests/main.nf.test.snap new file mode 100644 index 000000000000..c719f3f5abea --- /dev/null +++ b/modules/nf-core/pbmarkdup/tests/main.nf.test.snap @@ -0,0 +1,101 @@ +{ + "acropora cervicornis - bam": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bam:md5,63490fcddb44783b761fab8e54123d73" + ] + ], + "1": [ + "versions.yml:md5,832e36b56615fb29a94b16e4db32b8db" + ], + "markduped": [ + [ + { + "id": "test" + }, + "test.bam:md5,63490fcddb44783b761fab8e54123d73" + ] + ], + "versions": [ + "versions.yml:md5,832e36b56615fb29a94b16e4db32b8db" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.0" + }, + "timestamp": "2025-11-25T13:57:29.646511931" + }, + "deilephila porcellus - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.fa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,832e36b56615fb29a94b16e4db32b8db" + ], + "markduped": [ + [ + { + "id": "test" + }, + "test.fa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,832e36b56615fb29a94b16e4db32b8db" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.0" + }, + "timestamp": "2025-11-25T13:43:50.757401556" + }, + "deilephila porcellus - fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.fa:md5,087cee5291f8d728a62b91765b64af35" + ] + ], + "1": [ + "versions.yml:md5,832e36b56615fb29a94b16e4db32b8db" + ], + "markduped": [ + [ + { + "id": "test" + }, + "test.fa:md5,087cee5291f8d728a62b91765b64af35" + ] + ], + "versions": [ + "versions.yml:md5,832e36b56615fb29a94b16e4db32b8db" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.0" + }, + "timestamp": "2025-11-25T13:43:28.330988318" + } +} \ No newline at end of file diff --git a/modules/nf-core/pbmarkdup/tests/nextflow.config b/modules/nf-core/pbmarkdup/tests/nextflow.config new file mode 100644 index 000000000000..033dac0673a2 --- /dev/null +++ b/modules/nf-core/pbmarkdup/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: PBMARKDUP { + ext.args = { "--clobber" } + } +} From 9239d22fd83eb96560caef2009420b7f44b3bf6e Mon Sep 17 00:00:00 2001 From: sainsachiko Date: Wed, 26 Nov 2025 02:01:34 +0700 Subject: [PATCH 2/8] Fix linting --- modules/nf-core/pbmarkdup/environment.yml | 2 +- modules/nf-core/pbmarkdup/main.nf | 12 ++++++------ modules/nf-core/pbmarkdup/meta.yml | 18 +++++++++++------- modules/nf-core/pbmarkdup/tests/main.nf.test | 2 +- 4 files changed, 19 insertions(+), 15 deletions(-) diff --git a/modules/nf-core/pbmarkdup/environment.yml b/modules/nf-core/pbmarkdup/environment.yml index 4484c5763827..7e5a3099a716 100644 --- a/modules/nf-core/pbmarkdup/environment.yml +++ b/modules/nf-core/pbmarkdup/environment.yml @@ -4,4 +4,4 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::pbmarkdup==1.2.0 \ No newline at end of file + - bioconda::pbmarkdup=1.2.0 diff --git a/modules/nf-core/pbmarkdup/main.nf b/modules/nf-core/pbmarkdup/main.nf index e6640bd1e900..c9f351952a98 100644 --- a/modules/nf-core/pbmarkdup/main.nf +++ b/modules/nf-core/pbmarkdup/main.nf @@ -11,8 +11,8 @@ process PBMARKDUP { tuple val(meta), path(input) output: - tuple val(meta), path("*.{bam,f*a,/.*f.*\\.gz/}") , emit: markduped - path "versions.yml" , emit: versions + tuple val(meta), path("${prefix}.${suffix}"), emit: markduped + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -20,8 +20,8 @@ process PBMARKDUP { script: def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def suffix = input.getExtension() + prefix = task.ext.prefix ?: "${meta.id}" + suffix = input.getExtension() """ pbmarkdup \\ @@ -37,8 +37,8 @@ process PBMARKDUP { """ stub: - def prefix = task.ext.prefix ?: "${meta.id}" - def suffix = input.getExtension() + prefix = task.ext.prefix ?: "${meta.id}" + suffix = input.getExtension() """ touch ${prefix}.${suffix} diff --git a/modules/nf-core/pbmarkdup/meta.yml b/modules/nf-core/pbmarkdup/meta.yml index 3dcde7b27b99..7762b0d78b14 100644 --- a/modules/nf-core/pbmarkdup/meta.yml +++ b/modules/nf-core/pbmarkdup/meta.yml @@ -1,7 +1,7 @@ # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: "pbmarkdup" description: | - Takes one or multiple sequencing chips of an amplified library as HiFi reads and marks or removes + Takes one or multiple sequencing chips of an amplified library as HiFi reads and marks or removes duplicates. keywords: - markdup @@ -11,9 +11,9 @@ keywords: tools: - pbmarkdup: description: | - pbmarkdup identifies and marks duplicate reads in PacBio HiFi (CCS) data. It clusters - highly similar CCS reads to detect PCR duplicates and flags them in the BAM output - (duplicate bit 0x400), optionally removing duplicates. + pbmarkdup identifies and marks duplicate reads in PacBio HiFi (CCS) data. It clusters + highly similar CCS reads to detect PCR duplicates and flags them in the BAM output + (duplicate bit 0x400), optionally removing duplicates. homepage: https://github.com/PacificBiosciences/pbmarkdup documentation: https://github.com/PacificBiosciences/pbmarkdup licence: ["BSD-3-Clause"] @@ -31,6 +31,8 @@ input: pattern: "*.{bam,f*a,/.*f.*\\.gz/}" ontologies: - edam: http://edamontology.org/format_2546 # FASTA-like + - edam: "http://edamontology.org/format_1930" # FASTQ + - edam: "http://edamontology.org/format_2572" # BAM output: markduped: - - meta: @@ -38,13 +40,15 @@ output: description: | Groovy Map containing sample information e.g. `[ id:'sample1' ]` - - "*.${suffix}": + - ${prefix}.${suffix}: type: file description: | Markduplicated sequencing reads in the same format as the input file. pattern: "*.{bam,f*a,/.*f.*\\.gz/}" ontologies: - - edam: http://edamontology.org/format_3003 + - edam: http://edamontology.org/format_2546 # FASTA-like + - edam: "http://edamontology.org/format_1930" # FASTQ + - edam: "http://edamontology.org/format_2572" # BAM versions: - versions.yml: type: file @@ -55,4 +59,4 @@ output: authors: - "@sainsachiko" maintainers: - - "@sainsachiko" \ No newline at end of file + - "@sainsachiko" diff --git a/modules/nf-core/pbmarkdup/tests/main.nf.test b/modules/nf-core/pbmarkdup/tests/main.nf.test index fe9a17389be6..cdbd594d4ca3 100644 --- a/modules/nf-core/pbmarkdup/tests/main.nf.test +++ b/modules/nf-core/pbmarkdup/tests/main.nf.test @@ -6,7 +6,7 @@ nextflow_process { process "PBMARKDUP" tag "modules" - tag "modules_sangertol" + tag "modules_nfcore" tag "pbmarkdup" config './nextflow.config' From 2a75dc8a8738e50eb0f2c09d58d59ab681952611 Mon Sep 17 00:00:00 2001 From: sainsachiko Date: Wed, 26 Nov 2025 16:25:43 +0700 Subject: [PATCH 3/8] Update path to test data --- modules/nf-core/pbmarkdup/tests/main.nf.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/nf-core/pbmarkdup/tests/main.nf.test b/modules/nf-core/pbmarkdup/tests/main.nf.test index cdbd594d4ca3..6ad9a1fa3311 100644 --- a/modules/nf-core/pbmarkdup/tests/main.nf.test +++ b/modules/nf-core/pbmarkdup/tests/main.nf.test @@ -45,7 +45,7 @@ nextflow_process { input[0] = Channel.of( [ [ id:'test' ], // meta map - file('https://github.com/sainsachiko/test-datasets/raw/refs/heads/modules/data/genomics/eukaryotes/acropora_cervicornis/m84093_241116_151316_s2.hifi_reads.bc2028.resubset.bam', checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/eukaryotes/acropora_cervicornis/m84093_241116_151316_s2.hifi_reads.bc2028.resubset.bam', checkIfExists: true) ] ) """ From 4d913d69316afd6a6c379c0bccc723a68ed16d30 Mon Sep 17 00:00:00 2001 From: sainsachiko Date: Thu, 27 Nov 2025 22:09:51 +0700 Subject: [PATCH 4/8] Update with code review (--dup-file, log, check file name collisions) --- modules/nf-core/pbmarkdup/main.nf | 37 ++++- modules/nf-core/pbmarkdup/tests/main.nf.test | 46 ++++++- .../nf-core/pbmarkdup/tests/main.nf.test.snap | 129 ++++++++++++++++-- .../nf-core/pbmarkdup/tests/nextflow.config | 4 +- 4 files changed, 196 insertions(+), 20 deletions(-) diff --git a/modules/nf-core/pbmarkdup/main.nf b/modules/nf-core/pbmarkdup/main.nf index c9f351952a98..287e887be5eb 100644 --- a/modules/nf-core/pbmarkdup/main.nf +++ b/modules/nf-core/pbmarkdup/main.nf @@ -12,23 +12,42 @@ process PBMARKDUP { output: tuple val(meta), path("${prefix}.${suffix}"), emit: markduped + tuple val(meta), path("${dupfile_name}") , emit: dupfile , optional: true + tuple val(meta), path("*.pbmarkdup.log") , emit: logs , optional: true path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + suffix = input[0].getExtension() // To allow multiple input types + dupfile_name = args.contains('--dup-file') ? (args =~ /--dup-file\s+(\S+)/)[0][1] : '' + def log_args = args.contains('--log-level') ? " > ${prefix}.pbmarkdup.log" : '' + def file_list = input.collect { it.getName() }.join(' ') - prefix = task.ext.prefix ?: "${meta.id}" - suffix = input.getExtension() + // Check file name collisions between input, output, and duplicate file + if (file_list.contains("${prefix}.${suffix}")) + error """Output file `${prefix}.${suffix}` conflicts with an input file. + Please change the output `$prefix` or input file names.""" + if (dupfile_name) { + if (file_list.contains(dupfile_name)) + error """Duplicate file `$dupfile_name` conflicts with an input file. + Please change the duplicate file name `$dupfile_name` or input file names.""" + + if (dupfile_name == "${prefix}.${suffix}") + error """Duplicate file `$dupfile_name` cannot be the same as the output file name. + Please change the duplicate file name `$dupfile_name` or output prefix `$prefix`.""" + } """ pbmarkdup \\ -j ${task.cpus} \\ - $input \\ + ${file_list} \\ ${prefix}.${suffix} \\ - $args + $args \\ + ${log_args} cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -37,8 +56,12 @@ process PBMARKDUP { """ stub: - prefix = task.ext.prefix ?: "${meta.id}" - suffix = input.getExtension() + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + suffix = input[0].getExtension() // To allow multiple input types + dupfile_name = args.contains('--dup-file') ? (args =~ /--dup-file\s+(\S+)/)[0][1] : '' + def log_args = args.contains('--log-level') ? " > ${prefix}.pbmarkdup.log" : '' + def file_list = input.collect { it.getName() }.join(' ') """ touch ${prefix}.${suffix} diff --git a/modules/nf-core/pbmarkdup/tests/main.nf.test b/modules/nf-core/pbmarkdup/tests/main.nf.test index 6ad9a1fa3311..34d19a058837 100644 --- a/modules/nf-core/pbmarkdup/tests/main.nf.test +++ b/modules/nf-core/pbmarkdup/tests/main.nf.test @@ -15,6 +15,10 @@ nextflow_process { when { + params { + pbmarkdup_args = "--clobber --dup-file output.dup.fasta" + } + process { """ input[0] = Channel.of( @@ -36,16 +40,51 @@ nextflow_process { } - test("acropora cervicornis - bam") { + test("acropora cervicornis - bam - multiple tests with dupfile and log") { + when { + + params { + pbmarkdup_args = "--clobber --dup-file ${prefix}.dup.bam --log-level INFO" + } + + process { + """ + input[0] = Channel.of( + [ + [ id:'test' ], // meta map + [ file('https://github.com/sainsachiko/test-datasets/raw/refs/heads/modules/data/genomics/eukaryotes/acropora_cervicornis/m84093_241116_151316_s2.hifi_reads.bc2028.subset.1.bam', checkIfExists: true), + file('https://github.com/sainsachiko/test-datasets/raw/refs/heads/modules/data/genomics/eukaryotes/acropora_cervicornis/m84093_241116_151316_s2.hifi_reads.bc2028.subset.2.bam', checkIfExists: true) + ] + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + test("acropora cervicornis - bam - multiple tests remove duplicates") { when { + params { + pbmarkdup_args = "--clobber --rmdup" + } + process { """ input[0] = Channel.of( [ [ id:'test' ], // meta map - file(params.modules_testdata_base_path + 'genomics/eukaryotes/acropora_cervicornis/m84093_241116_151316_s2.hifi_reads.bc2028.resubset.bam', checkIfExists: true) + [ file('https://github.com/sainsachiko/test-datasets/raw/refs/heads/modules/data/genomics/eukaryotes/acropora_cervicornis/m84093_241116_151316_s2.hifi_reads.bc2028.subset.1.bam', checkIfExists: true), + file('https://github.com/sainsachiko/test-datasets/raw/refs/heads/modules/data/genomics/eukaryotes/acropora_cervicornis/m84093_241116_151316_s2.hifi_reads.bc2028.subset.2.bam', checkIfExists: true) + ] ] ) """ @@ -66,6 +105,9 @@ nextflow_process { options "-stub" when { + params { + pbmarkdup_args = "" + } process { """ diff --git a/modules/nf-core/pbmarkdup/tests/main.nf.test.snap b/modules/nf-core/pbmarkdup/tests/main.nf.test.snap index c719f3f5abea..31ae76263b3a 100644 --- a/modules/nf-core/pbmarkdup/tests/main.nf.test.snap +++ b/modules/nf-core/pbmarkdup/tests/main.nf.test.snap @@ -1,5 +1,5 @@ { - "acropora cervicornis - bam": { + "acropora cervicornis - bam - multiple tests remove duplicates": { "content": [ { "0": [ @@ -7,18 +7,95 @@ { "id": "test" }, - "test.bam:md5,63490fcddb44783b761fab8e54123d73" + "test.bam:md5,86e22a794d904cc48cb3758a03883ba1" ] ], "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,832e36b56615fb29a94b16e4db32b8db" + ], + "dupfile": [ + + ], + "logs": [ + + ], + "markduped": [ + [ + { + "id": "test" + }, + "test.bam:md5,86e22a794d904cc48cb3758a03883ba1" + ] + ], + "versions": [ + "versions.yml:md5,832e36b56615fb29a94b16e4db32b8db" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2025-11-27T22:01:01.517309" + }, + "acropora cervicornis - bam - multiple tests with dupfile and log": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bam:md5,86e22a794d904cc48cb3758a03883ba1" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "null.dup.bam:md5,3b74225ad5f7e9e1cbafc45132ad82fb" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "test.pbmarkdup.log:md5,99987a1331d01b59aa3b5ccd1c787906" + ] + ], + "3": [ "versions.yml:md5,832e36b56615fb29a94b16e4db32b8db" ], + "dupfile": [ + [ + { + "id": "test" + }, + "null.dup.bam:md5,3b74225ad5f7e9e1cbafc45132ad82fb" + ] + ], + "logs": [ + [ + { + "id": "test" + }, + "test.pbmarkdup.log:md5,99987a1331d01b59aa3b5ccd1c787906" + ] + ], "markduped": [ [ { "id": "test" }, - "test.bam:md5,63490fcddb44783b761fab8e54123d73" + "test.bam:md5,86e22a794d904cc48cb3758a03883ba1" ] ], "versions": [ @@ -28,9 +105,9 @@ ], "meta": { "nf-test": "0.9.2", - "nextflow": "25.10.0" + "nextflow": "25.04.2" }, - "timestamp": "2025-11-25T13:57:29.646511931" + "timestamp": "2025-11-27T22:00:33.750177" }, "deilephila porcellus - stub": { "content": [ @@ -44,7 +121,19 @@ ] ], "1": [ + + ], + "2": [ + + ], + "3": [ "versions.yml:md5,832e36b56615fb29a94b16e4db32b8db" + ], + "dupfile": [ + + ], + "logs": [ + ], "markduped": [ [ @@ -61,9 +150,9 @@ ], "meta": { "nf-test": "0.9.2", - "nextflow": "25.10.0" + "nextflow": "25.04.2" }, - "timestamp": "2025-11-25T13:43:50.757401556" + "timestamp": "2025-11-27T22:01:14.242019" }, "deilephila porcellus - fasta": { "content": [ @@ -77,7 +166,29 @@ ] ], "1": [ + [ + { + "id": "test" + }, + "output.dup.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ "versions.yml:md5,832e36b56615fb29a94b16e4db32b8db" + ], + "dupfile": [ + [ + { + "id": "test" + }, + "output.dup.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "logs": [ + ], "markduped": [ [ @@ -94,8 +205,8 @@ ], "meta": { "nf-test": "0.9.2", - "nextflow": "25.10.0" + "nextflow": "25.04.2" }, - "timestamp": "2025-11-25T13:43:28.330988318" + "timestamp": "2025-11-27T22:00:02.071951" } } \ No newline at end of file diff --git a/modules/nf-core/pbmarkdup/tests/nextflow.config b/modules/nf-core/pbmarkdup/tests/nextflow.config index 033dac0673a2..38bce1a3ac67 100644 --- a/modules/nf-core/pbmarkdup/tests/nextflow.config +++ b/modules/nf-core/pbmarkdup/tests/nextflow.config @@ -1,5 +1,5 @@ process { withName: PBMARKDUP { - ext.args = { "--clobber" } + ext.args = { "${params.pbmarkdup_args}" } } -} +} \ No newline at end of file From 1271655db67e4cffc9d87002a2e6f696c87b0c9c Mon Sep 17 00:00:00 2001 From: sainsachiko Date: Thu, 27 Nov 2025 22:50:58 +0700 Subject: [PATCH 5/8] Fix linting --- modules/nf-core/pbmarkdup/main.nf | 8 ++--- modules/nf-core/pbmarkdup/meta.yml | 27 +++++++++++++++++ modules/nf-core/pbmarkdup/tests/main.nf.test | 6 ++-- .../nf-core/pbmarkdup/tests/main.nf.test.snap | 30 +++++++------------ .../nf-core/pbmarkdup/tests/nextflow.config | 2 +- 5 files changed, 45 insertions(+), 28 deletions(-) diff --git a/modules/nf-core/pbmarkdup/main.nf b/modules/nf-core/pbmarkdup/main.nf index 287e887be5eb..20da004d21b5 100644 --- a/modules/nf-core/pbmarkdup/main.nf +++ b/modules/nf-core/pbmarkdup/main.nf @@ -13,7 +13,7 @@ process PBMARKDUP { output: tuple val(meta), path("${prefix}.${suffix}"), emit: markduped tuple val(meta), path("${dupfile_name}") , emit: dupfile , optional: true - tuple val(meta), path("*.pbmarkdup.log") , emit: logs , optional: true + tuple val(meta), path("*.pbmarkdup.log") , emit: log , optional: true path "versions.yml" , emit: versions when: @@ -28,15 +28,15 @@ process PBMARKDUP { def file_list = input.collect { it.getName() }.join(' ') // Check file name collisions between input, output, and duplicate file - if (file_list.contains("${prefix}.${suffix}")) + if (file_list.contains("${prefix}.${suffix}")) error """Output file `${prefix}.${suffix}` conflicts with an input file. Please change the output `$prefix` or input file names.""" if (dupfile_name) { - if (file_list.contains(dupfile_name)) + if (file_list.contains(dupfile_name)) error """Duplicate file `$dupfile_name` conflicts with an input file. Please change the duplicate file name `$dupfile_name` or input file names.""" - if (dupfile_name == "${prefix}.${suffix}") + if (dupfile_name == "${prefix}.${suffix}") error """Duplicate file `$dupfile_name` cannot be the same as the output file name. Please change the duplicate file name `$dupfile_name` or output prefix `$prefix`.""" } diff --git a/modules/nf-core/pbmarkdup/meta.yml b/modules/nf-core/pbmarkdup/meta.yml index 7762b0d78b14..9ec66e32929c 100644 --- a/modules/nf-core/pbmarkdup/meta.yml +++ b/modules/nf-core/pbmarkdup/meta.yml @@ -49,6 +49,33 @@ output: - edam: http://edamontology.org/format_2546 # FASTA-like - edam: "http://edamontology.org/format_1930" # FASTQ - edam: "http://edamontology.org/format_2572" # BAM + dupfile: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - ${dupfile_name}: + type: file + description: | + (Optional) File listing duplicate reads (Specify by --dup-file). + pattern: "*.{bam,f*a,/.*f.*\\.gz/}" + ontologies: + - edam: http://edamontology.org/format_2546 # FASTA-like + - edam: "http://edamontology.org/format_1930" # FASTQ + - edam: "http://edamontology.org/format_2572" # BAM + log: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*.pbmarkdup.log": + type: file + description: | + Log file generated by pbmarkdup (if --log-level is specified). + pattern: "*.pbmarkdup.log" + ontologies: [] versions: - versions.yml: type: file diff --git a/modules/nf-core/pbmarkdup/tests/main.nf.test b/modules/nf-core/pbmarkdup/tests/main.nf.test index 34d19a058837..70aae1b97b0b 100644 --- a/modules/nf-core/pbmarkdup/tests/main.nf.test +++ b/modules/nf-core/pbmarkdup/tests/main.nf.test @@ -16,7 +16,7 @@ nextflow_process { when { params { - pbmarkdup_args = "--clobber --dup-file output.dup.fasta" + pbmarkdup_args = "--clobber" } process { @@ -53,7 +53,7 @@ nextflow_process { [ [ id:'test' ], // meta map [ file('https://github.com/sainsachiko/test-datasets/raw/refs/heads/modules/data/genomics/eukaryotes/acropora_cervicornis/m84093_241116_151316_s2.hifi_reads.bc2028.subset.1.bam', checkIfExists: true), - file('https://github.com/sainsachiko/test-datasets/raw/refs/heads/modules/data/genomics/eukaryotes/acropora_cervicornis/m84093_241116_151316_s2.hifi_reads.bc2028.subset.2.bam', checkIfExists: true) + file('https://github.com/sainsachiko/test-datasets/raw/refs/heads/modules/data/genomics/eukaryotes/acropora_cervicornis/m84093_241116_151316_s2.hifi_reads.bc2028.subset.2.bam', checkIfExists: true) ] ] ) @@ -83,7 +83,7 @@ nextflow_process { [ [ id:'test' ], // meta map [ file('https://github.com/sainsachiko/test-datasets/raw/refs/heads/modules/data/genomics/eukaryotes/acropora_cervicornis/m84093_241116_151316_s2.hifi_reads.bc2028.subset.1.bam', checkIfExists: true), - file('https://github.com/sainsachiko/test-datasets/raw/refs/heads/modules/data/genomics/eukaryotes/acropora_cervicornis/m84093_241116_151316_s2.hifi_reads.bc2028.subset.2.bam', checkIfExists: true) + file('https://github.com/sainsachiko/test-datasets/raw/refs/heads/modules/data/genomics/eukaryotes/acropora_cervicornis/m84093_241116_151316_s2.hifi_reads.bc2028.subset.2.bam', checkIfExists: true) ] ] ) diff --git a/modules/nf-core/pbmarkdup/tests/main.nf.test.snap b/modules/nf-core/pbmarkdup/tests/main.nf.test.snap index 31ae76263b3a..705b7dd4f2ac 100644 --- a/modules/nf-core/pbmarkdup/tests/main.nf.test.snap +++ b/modules/nf-core/pbmarkdup/tests/main.nf.test.snap @@ -22,7 +22,7 @@ "dupfile": [ ], - "logs": [ + "log": [ ], "markduped": [ @@ -42,7 +42,7 @@ "nf-test": "0.9.2", "nextflow": "25.04.2" }, - "timestamp": "2025-11-27T22:01:01.517309" + "timestamp": "2025-11-27T22:25:53.428359" }, "acropora cervicornis - bam - multiple tests with dupfile and log": { "content": [ @@ -82,7 +82,7 @@ "null.dup.bam:md5,3b74225ad5f7e9e1cbafc45132ad82fb" ] ], - "logs": [ + "log": [ [ { "id": "test" @@ -107,7 +107,7 @@ "nf-test": "0.9.2", "nextflow": "25.04.2" }, - "timestamp": "2025-11-27T22:00:33.750177" + "timestamp": "2025-11-27T22:25:23.374664" }, "deilephila porcellus - stub": { "content": [ @@ -132,7 +132,7 @@ "dupfile": [ ], - "logs": [ + "log": [ ], "markduped": [ @@ -152,7 +152,7 @@ "nf-test": "0.9.2", "nextflow": "25.04.2" }, - "timestamp": "2025-11-27T22:01:14.242019" + "timestamp": "2025-11-27T22:26:16.491708" }, "deilephila porcellus - fasta": { "content": [ @@ -166,12 +166,7 @@ ] ], "1": [ - [ - { - "id": "test" - }, - "output.dup.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" - ] + ], "2": [ @@ -180,14 +175,9 @@ "versions.yml:md5,832e36b56615fb29a94b16e4db32b8db" ], "dupfile": [ - [ - { - "id": "test" - }, - "output.dup.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" - ] + ], - "logs": [ + "log": [ ], "markduped": [ @@ -207,6 +197,6 @@ "nf-test": "0.9.2", "nextflow": "25.04.2" }, - "timestamp": "2025-11-27T22:00:02.071951" + "timestamp": "2025-11-27T22:47:33.595865" } } \ No newline at end of file diff --git a/modules/nf-core/pbmarkdup/tests/nextflow.config b/modules/nf-core/pbmarkdup/tests/nextflow.config index 38bce1a3ac67..dc9b092f0130 100644 --- a/modules/nf-core/pbmarkdup/tests/nextflow.config +++ b/modules/nf-core/pbmarkdup/tests/nextflow.config @@ -2,4 +2,4 @@ process { withName: PBMARKDUP { ext.args = { "${params.pbmarkdup_args}" } } -} \ No newline at end of file +} From 2a92fde49d9157a9e0ebb9f82bcb3fcb80606e04 Mon Sep 17 00:00:00 2001 From: sainsachiko Date: Thu, 27 Nov 2025 23:24:17 +0700 Subject: [PATCH 6/8] Update path to test data --- modules/nf-core/pbmarkdup/tests/main.nf.test | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/nf-core/pbmarkdup/tests/main.nf.test b/modules/nf-core/pbmarkdup/tests/main.nf.test index 70aae1b97b0b..dbe78c3517cc 100644 --- a/modules/nf-core/pbmarkdup/tests/main.nf.test +++ b/modules/nf-core/pbmarkdup/tests/main.nf.test @@ -52,8 +52,8 @@ nextflow_process { input[0] = Channel.of( [ [ id:'test' ], // meta map - [ file('https://github.com/sainsachiko/test-datasets/raw/refs/heads/modules/data/genomics/eukaryotes/acropora_cervicornis/m84093_241116_151316_s2.hifi_reads.bc2028.subset.1.bam', checkIfExists: true), - file('https://github.com/sainsachiko/test-datasets/raw/refs/heads/modules/data/genomics/eukaryotes/acropora_cervicornis/m84093_241116_151316_s2.hifi_reads.bc2028.subset.2.bam', checkIfExists: true) + [ file(params.modules_testdata_base_path + 'genomics/eukaryotes/acropora_cervicornis/m84093_241116_151316_s2.hifi_reads.bc2028.subset.1.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/eukaryotes/acropora_cervicornis/m84093_241116_151316_s2.hifi_reads.bc2028.subset.2.bam', checkIfExists: true) ] ] ) @@ -82,8 +82,8 @@ nextflow_process { input[0] = Channel.of( [ [ id:'test' ], // meta map - [ file('https://github.com/sainsachiko/test-datasets/raw/refs/heads/modules/data/genomics/eukaryotes/acropora_cervicornis/m84093_241116_151316_s2.hifi_reads.bc2028.subset.1.bam', checkIfExists: true), - file('https://github.com/sainsachiko/test-datasets/raw/refs/heads/modules/data/genomics/eukaryotes/acropora_cervicornis/m84093_241116_151316_s2.hifi_reads.bc2028.subset.2.bam', checkIfExists: true) + [ file(params.modules_testdata_base_path + 'genomics/eukaryotes/acropora_cervicornis/m84093_241116_151316_s2.hifi_reads.bc2028.subset.1.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/eukaryotes/acropora_cervicornis/m84093_241116_151316_s2.hifi_reads.bc2028.subset.2.bam', checkIfExists: true) ] ] ) From 2d42b4c1b2bf7d74ab5935e14945d2c8a869f4f9 Mon Sep 17 00:00:00 2001 From: Hanh Hoang <134130358+sainsachiko@users.noreply.github.com> Date: Mon, 1 Dec 2025 14:15:24 +0700 Subject: [PATCH 7/8] Update modules/nf-core/pbmarkdup/meta.yml --- modules/nf-core/pbmarkdup/meta.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/nf-core/pbmarkdup/meta.yml b/modules/nf-core/pbmarkdup/meta.yml index 9ec66e32929c..fc12076aa073 100644 --- a/modules/nf-core/pbmarkdup/meta.yml +++ b/modules/nf-core/pbmarkdup/meta.yml @@ -12,7 +12,8 @@ tools: - pbmarkdup: description: | pbmarkdup identifies and marks duplicate reads in PacBio HiFi (CCS) data. It clusters - highly similar CCS reads to detect PCR duplicates and flags them in the BAM output + highly similar CCS reads to detect PCR duplicates and flags them in the output files + (BAM,FASTQ,FASTA) (duplicate bit 0x400), optionally removing duplicates. (duplicate bit 0x400), optionally removing duplicates. homepage: https://github.com/PacificBiosciences/pbmarkdup documentation: https://github.com/PacificBiosciences/pbmarkdup From dc0047b4df9467f3e8d6b8bc4f711623df97c605 Mon Sep 17 00:00:00 2001 From: Hanh Hoang <134130358+sainsachiko@users.noreply.github.com> Date: Mon, 1 Dec 2025 14:21:49 +0700 Subject: [PATCH 8/8] Fix linting --- modules/nf-core/pbmarkdup/meta.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/nf-core/pbmarkdup/meta.yml b/modules/nf-core/pbmarkdup/meta.yml index fc12076aa073..59a1de054258 100644 --- a/modules/nf-core/pbmarkdup/meta.yml +++ b/modules/nf-core/pbmarkdup/meta.yml @@ -12,7 +12,7 @@ tools: - pbmarkdup: description: | pbmarkdup identifies and marks duplicate reads in PacBio HiFi (CCS) data. It clusters - highly similar CCS reads to detect PCR duplicates and flags them in the output files + highly similar CCS reads to detect PCR duplicates and flags them in the output files (BAM,FASTQ,FASTA) (duplicate bit 0x400), optionally removing duplicates. (duplicate bit 0x400), optionally removing duplicates. homepage: https://github.com/PacificBiosciences/pbmarkdup