Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions modules/nf-core/pbmarkdup/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/sanger-tol/nf-core-modules/master/modules/environment-schema.json
channels:
- conda-forge
- bioconda
dependencies:
- bioconda::pbmarkdup=1.2.0
73 changes: 73 additions & 0 deletions modules/nf-core/pbmarkdup/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
process PBMARKDUP {
tag "$meta.id"
label "process_high"

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/pbmarkdup:1.2.0--h9ee0642_0' :
'biocontainers/pbmarkdup:1.2.0--h9ee0642_0' }"

input:
tuple val(meta), path(input)

output:
tuple val(meta), path("${prefix}.${suffix}"), emit: markduped
tuple val(meta), path("${dupfile_name}") , emit: dupfile , optional: true
tuple val(meta), path("*.pbmarkdup.log") , emit: log , optional: true
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}"
suffix = input[0].getExtension() // To allow multiple input types
dupfile_name = args.contains('--dup-file') ? (args =~ /--dup-file\s+(\S+)/)[0][1] : ''
def log_args = args.contains('--log-level') ? " > ${prefix}.pbmarkdup.log" : ''
def file_list = input.collect { it.getName() }.join(' ')

// Check file name collisions between input, output, and duplicate file
if (file_list.contains("${prefix}.${suffix}"))
error """Output file `${prefix}.${suffix}` conflicts with an input file.
Please change the output `$prefix` or input file names."""
if (dupfile_name) {
if (file_list.contains(dupfile_name))
error """Duplicate file `$dupfile_name` conflicts with an input file.
Please change the duplicate file name `$dupfile_name` or input file names."""

if (dupfile_name == "${prefix}.${suffix}")
error """Duplicate file `$dupfile_name` cannot be the same as the output file name.
Please change the duplicate file name `$dupfile_name` or output prefix `$prefix`."""
}

"""
pbmarkdup \\
-j ${task.cpus} \\
${file_list} \\
${prefix}.${suffix} \\
$args \\
${log_args}

cat <<-END_VERSIONS > versions.yml
"${task.process}":
pbmarkdup: \$(echo \$(pbmarkdup --version 2>&1) | awk 'BEFORE{FS=" "}{print \$2}')
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}"
suffix = input[0].getExtension() // To allow multiple input types
dupfile_name = args.contains('--dup-file') ? (args =~ /--dup-file\s+(\S+)/)[0][1] : ''
def log_args = args.contains('--log-level') ? " > ${prefix}.pbmarkdup.log" : ''
def file_list = input.collect { it.getName() }.join(' ')
"""
touch ${prefix}.${suffix}

cat <<-END_VERSIONS > versions.yml
"${task.process}":
pbmarkdup: \$(echo \$(pbmarkdup --version 2>&1) | awk 'BEFORE{FS=" "}{print \$2}')
END_VERSIONS
"""
}
90 changes: 90 additions & 0 deletions modules/nf-core/pbmarkdup/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
name: "pbmarkdup"
description: |
Takes one or multiple sequencing chips of an amplified library as HiFi reads and marks or removes
duplicates.
keywords:
- markdup
- bam
- fastq
- fasta
tools:
- pbmarkdup:
description: |
pbmarkdup identifies and marks duplicate reads in PacBio HiFi (CCS) data. It clusters
highly similar CCS reads to detect PCR duplicates and flags them in the output files
(BAM,FASTQ,FASTA) (duplicate bit 0x400), optionally removing duplicates.
(duplicate bit 0x400), optionally removing duplicates.
homepage: https://github.com/PacificBiosciences/pbmarkdup
documentation: https://github.com/PacificBiosciences/pbmarkdup
licence: ["BSD-3-Clause"]
identifier: biotools:pbmarkdup
input:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1' ]`
- input:
type: file
description: |
Sequencing reads in BAM, FASTQ, or FASTA format.
pattern: "*.{bam,f*a,/.*f.*\\.gz/}"
ontologies:
- edam: http://edamontology.org/format_2546 # FASTA-like
- edam: "http://edamontology.org/format_1930" # FASTQ
- edam: "http://edamontology.org/format_2572" # BAM
output:
markduped:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1' ]`
- ${prefix}.${suffix}:
type: file
description: |
Markduplicated sequencing reads in the same format as the input file.
pattern: "*.{bam,f*a,/.*f.*\\.gz/}"
ontologies:
- edam: http://edamontology.org/format_2546 # FASTA-like
- edam: "http://edamontology.org/format_1930" # FASTQ
- edam: "http://edamontology.org/format_2572" # BAM
dupfile:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1' ]`
- ${dupfile_name}:
type: file
description: |
(Optional) File listing duplicate reads (Specify by --dup-file).
pattern: "*.{bam,f*a,/.*f.*\\.gz/}"
ontologies:
- edam: http://edamontology.org/format_2546 # FASTA-like
- edam: "http://edamontology.org/format_1930" # FASTQ
- edam: "http://edamontology.org/format_2572" # BAM
log:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1' ]`
- "*.pbmarkdup.log":
type: file
description: |
Log file generated by pbmarkdup (if --log-level is specified).
pattern: "*.pbmarkdup.log"
ontologies: []
versions:
- versions.yml:
type: file
description: File containing software versions
pattern: "versions.yml"
ontologies:
- edam: http://edamontology.org/format_3750 # YAML
authors:
- "@sainsachiko"
maintainers:
- "@sainsachiko"
133 changes: 133 additions & 0 deletions modules/nf-core/pbmarkdup/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@

nextflow_process {

name "Test Process PBMARKDUP"
script "../main.nf"
process "PBMARKDUP"

tag "modules"
tag "modules_nfcore"
tag "pbmarkdup"

config './nextflow.config'

test("deilephila porcellus - fasta") {

when {

params {
pbmarkdup_args = "--clobber"
}

process {
"""
input[0] = Channel.of(
[
[ id:'test' ], // meta map
file(params.modules_testdata_base_path + 'genomics/eukaryotes/deilephila_porcellus/mito/ilDeiPorc1.HiFi.reads.fa', checkIfExists: true)
]
)
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

test("acropora cervicornis - bam - multiple tests with dupfile and log") {
when {

params {
pbmarkdup_args = "--clobber --dup-file ${prefix}.dup.bam --log-level INFO"
}

process {
"""
input[0] = Channel.of(
[
[ id:'test' ], // meta map
[ file(params.modules_testdata_base_path + 'genomics/eukaryotes/acropora_cervicornis/m84093_241116_151316_s2.hifi_reads.bc2028.subset.1.bam', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/eukaryotes/acropora_cervicornis/m84093_241116_151316_s2.hifi_reads.bc2028.subset.2.bam', checkIfExists: true)
]
]
)
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

test("acropora cervicornis - bam - multiple tests remove duplicates") {
when {

params {
pbmarkdup_args = "--clobber --rmdup"
}

process {
"""
input[0] = Channel.of(
[
[ id:'test' ], // meta map
[ file(params.modules_testdata_base_path + 'genomics/eukaryotes/acropora_cervicornis/m84093_241116_151316_s2.hifi_reads.bc2028.subset.1.bam', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/eukaryotes/acropora_cervicornis/m84093_241116_151316_s2.hifi_reads.bc2028.subset.2.bam', checkIfExists: true)
]
]
)
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

test("deilephila porcellus - stub") {

options "-stub"

when {
params {
pbmarkdup_args = ""
}

process {
"""
input[0] = Channel.of(
[
[ id:'test' ], // meta map
file(params.modules_testdata_base_path + 'genomics/eukaryotes/deilephila_porcellus/mito/ilDeiPorc1.HiFi.reads.fa', checkIfExists: true)
]
)
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

}
Loading
Loading