Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion modules/nf-core/ribodetector/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ channels:
- conda-forge
- bioconda
dependencies:
- "bioconda::ribodetector=0.3.1"
- "bioconda::ribodetector=0.3.2"
18 changes: 4 additions & 14 deletions modules/nf-core/ribodetector/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ process RIBODETECTOR {

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/ribodetector:0.3.1--pyhdfd78af_0':
'biocontainers/ribodetector:0.3.1--pyhdfd78af_0' }"
'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/4d/4de8fe74d21198e6fc8218cb3209d929b3d7dab750678501b096b0ccc324307b/data' :
'community.wave.seqera.io/library/ribodetector:0.3.2--cbe1c77fa14eeb53' }"

input:
tuple val(meta), path(fastq)
Expand All @@ -14,7 +14,7 @@ process RIBODETECTOR {
output:
tuple val(meta), path("*.nonrna*.fastq.gz"), emit: fastq
tuple val(meta), path("*.log") , emit: log
path "versions.yml" , emit: versions
tuple val("${task.process}"), val('ribodetector'), eval('ribodetector --version | sed "s/ribodetector //"'), emit: versions_ribodetector, topic: versions

when:
task.ext.when == null || task.ext.when
Expand All @@ -35,11 +35,6 @@ process RIBODETECTOR {
--log ${prefix}.log \\
${ribodetector_mem} \\
${args}

cat <<-END_VERSIONS > versions.yml
"${task.process}":
ribodetector: \$(ribodetector --version | sed 's/ribodetector //g')
END_VERSIONS
"""

stub:
Expand All @@ -50,12 +45,7 @@ process RIBODETECTOR {
echo $args

echo | gzip > ${prefix}.nonrna.1.fastq.gz
echo | gzip > ${prefix}.nonrna.2.fastq.gz
echo | gzip > ${prefix}.nonrna.2.fastq.gz
touch ${prefix}.log

cat <<-END_VERSIONS > versions.yml
"${task.process}":
ribodetector: \$(ribodetector --version | sed 's/ribodetector //g')
END_VERSIONS
"""
}
37 changes: 25 additions & 12 deletions modules/nf-core/ribodetector/meta.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
name: "ribodetector"
description: Accurate and rapid RiboRNA sequences Detector based on deep
learning
description: Accurate and rapid RiboRNA sequences Detector based on deep learning
keywords:
- RNA
- RNAseq
Expand All @@ -16,10 +15,10 @@ keywords:
tools:
- ribodetector:
description: Accurate and rapid RiboRNA sequences detector based on deep learning.
RiboDetector uses a deep learning approach to identify rRNA sequences in
ribosome profiling (Ribo-seq) data. It can be used to filter out rRNA reads
from Ribo-seq datasets, improving the quality of downstream analyses. As of version
0.3.1, Ribodetector doesn't support setting a random seed, so results may not be fully
RiboDetector uses a deep learning approach to identify rRNA sequences in ribosome
profiling (Ribo-seq) data. It can be used to filter out rRNA reads from Ribo-seq
datasets, improving the quality of downstream analyses. As of version 0.3.1,
Ribodetector doesn't support setting a random seed, so results may not be fully
deterministic across runs.
homepage: "https://github.com/hzi-bifo/RiboDetector"
documentation: "https://github.com/hzi-bifo/RiboDetector"
Expand Down Expand Up @@ -67,13 +66,27 @@ output:
description: Log file from RiboDetector
pattern: "*.log"
ontologies: []
versions_ribodetector:
- - ${task.process}:
type: string
description: Name of the process
- ribodetector:
type: string
description: Name of the tool
- ribodetector --version | sed "s/ribodetector //:
type: string
description: Version of ribodetector used
topics:
versions:
- versions.yml:
type: file
description: File containing software versions
pattern: versions.yml
ontologies:
- edam: http://edamontology.org/format_3750 # YAML
- - ${task.process}:
type: string
description: Name of the process
- ribodetector:
type: string
description: Name of the tool
- ribodetector --version | sed "s/ribodetector //:
type: string
description: Version of ribodetector used
authors:
- "@maxibor"
maintainers:
Expand Down
4 changes: 2 additions & 2 deletions modules/nf-core/ribodetector/tests/main.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ nextflow_process {
{ assert process.success },
{ assert process.out.fastq },
{ assert process.out.log },
{ assert path(process.out.log[0][1]).getText().contains("Writing output non-rRNA sequences") },
{ assert snapshot(process.out.versions).match() }
{ assert path(process.out.log[0][1]).getText().contains("Writing output non-rRNA sequences") }
// Note: versions collected via topic, not snapshotted
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should be snapshoting the versions. There's a snippet for that

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sorry, it was set to auto-merge! Will pr a fix

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

)
}

Expand Down
28 changes: 12 additions & 16 deletions modules/nf-core/ribodetector/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
@@ -1,16 +1,4 @@
{
"ribodetector - rnaseq PE input": {
"content": [
[
"versions.yml:md5,f98df8f0eaa704e4db74785adc9cc791"
]
],
"meta": {
"nf-test": "0.9.3",
"nextflow": "25.10.0"
},
"timestamp": "2025-11-07T13:20:15.909875"
},
"ribodetector - stub rnaseq PE input": {
"content": [
{
Expand All @@ -36,7 +24,11 @@
]
],
"2": [
"versions.yml:md5,f98df8f0eaa704e4db74785adc9cc791"
[
"RIBODETECTOR",
"ribodetector",
"0.3.2"
]
],
"fastq": [
[
Expand All @@ -59,15 +51,19 @@
"test.log:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"versions": [
"versions.yml:md5,f98df8f0eaa704e4db74785adc9cc791"
"versions_ribodetector": [
[
"RIBODETECTOR",
"ribodetector",
"0.3.2"
]
]
}
],
"meta": {
"nf-test": "0.9.3",
"nextflow": "25.10.0"
},
"timestamp": "2025-11-07T13:20:26.026547"
"timestamp": "2025-11-29T20:07:13.509994907"
}
}
102 changes: 18 additions & 84 deletions subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/main.nf
Original file line number Diff line number Diff line change
@@ -1,13 +1,10 @@
include { BBMAP_BBSPLIT } from '../../../modules/nf-core/bbmap/bbsplit'
include { CAT_FASTQ } from '../../../modules/nf-core/cat/fastq/main'
include { RIBODETECTOR } from '../../../modules/nf-core/ribodetector/main'
include { SEQKIT_STATS } from '../../../modules/nf-core/seqkit/stats/main'
include { SORTMERNA } from '../../../modules/nf-core/sortmerna/main'
include { SORTMERNA as SORTMERNA_INDEX } from '../../../modules/nf-core/sortmerna/main'
include { FQ_LINT } from '../../../modules/nf-core/fq/lint/main'
include { FQ_LINT as FQ_LINT_AFTER_TRIMMING } from '../../../modules/nf-core/fq/lint/main'
include { FQ_LINT as FQ_LINT_AFTER_BBSPLIT } from '../../../modules/nf-core/fq/lint/main'
include { FQ_LINT as FQ_LINT_AFTER_RIBO_REMOVAL } from '../../../modules/nf-core/fq/lint/main'
include { FASTQ_REMOVE_RRNA } from '../fastq_remove_rrna'
include { FASTQ_SUBSAMPLE_FQ_SALMON } from '../fastq_subsample_fq_salmon'
include { FASTQ_FASTQC_UMITOOLS_TRIMGALORE } from '../fastq_fastqc_umitools_trimgalore'
include { FASTQ_FASTQC_UMITOOLS_FASTP } from '../fastq_fastqc_umitools_fastp'
Expand Down Expand Up @@ -84,29 +81,6 @@ def multiqcTsvFromList(tsv_data, header) {
return tsv_string
}

//
// Function that parses seqkit stats TSV output to extract the mean read length
// for use with RiboDetector's -l parameter
//
def getReadLengthFromSeqkitStats(stats_file) {
def lines = stats_file.text.readLines()
if (lines.size() < 2) {
return 100 // Default fallback
}

def header = lines[0].split('\t')
def avgLenIdx = header.findIndexOf { it == 'avg_len' }
if (avgLenIdx < 0) {
return 100 // Default fallback if column not found
}

// Calculate mean avg_len across all files in the stats output
def avgLens = lines[1..-1].collect { it.split('\t')[avgLenIdx] as float }
def meanAvgLen = avgLens.sum() / avgLens.size()

return Math.round(meanAvgLen) as int
}

workflow FASTQ_QC_TRIM_FILTER_SETSTRANDEDNESS {
take:
// Input channels
Expand All @@ -116,8 +90,9 @@ workflow FASTQ_QC_TRIM_FILTER_SETSTRANDEDNESS {
ch_gtf // channel: /path/to/genome.gtf
ch_salmon_index // channel: /path/to/salmon/index/ (optional)
ch_sortmerna_index // channel: /path/to/sortmerna/index/ (optional)
ch_bowtie2_index // channel: /path/to/bowtie2/index/ (optional)
ch_bbsplit_index // channel: /path/to/bbsplit/index/ (optional)
ch_rrna_fastas // channel: one or more fasta files containing rrna sequences to be passed to SortMeRNA (optional)
ch_rrna_fastas // channel: one or more fasta files containing rrna sequences to be passed to SortMeRNA/Bowtie2 (optional)

// Skip options
skip_bbsplit // boolean: Skip BBSplit for removal of non-reference genome reads.
Expand All @@ -129,6 +104,7 @@ workflow FASTQ_QC_TRIM_FILTER_SETSTRANDEDNESS {
// Index generation
make_salmon_index // boolean: Whether to create salmon index before running salmon quant
make_sortmerna_index // boolean: Whether to create a sortmerna index before running sortmerna
make_bowtie2_index // boolean: Whether to create a bowtie2 index before running bowtie2

// Trimming options
trimmer // string (enum): 'fastp' or 'trimgalore'
Expand All @@ -138,7 +114,7 @@ workflow FASTQ_QC_TRIM_FILTER_SETSTRANDEDNESS {

// rRNA removal options
remove_ribo_rna // boolean: true/false: whether to remove rRNA
ribo_removal_tool // string (enum): 'sortmerna' or 'ribodetector'
ribo_removal_tool // string (enum): 'sortmerna', 'ribodetector', or 'bowtie2'

// UMI options
with_umi // boolean: true/false: Enable UMI-based read deduplication.
Expand Down Expand Up @@ -294,64 +270,22 @@ workflow FASTQ_QC_TRIM_FILTER_SETSTRANDEDNESS {
}

//
// MODULE: Remove ribosomal RNA reads
// SUBWORKFLOW: Remove ribosomal RNA reads
//
if (remove_ribo_rna) {
if (ribo_removal_tool == 'sortmerna') {
ch_sortmerna_fastas = ch_rrna_fastas
.collect()
.map { [[id: 'rrna_refs'], it] }

if (make_sortmerna_index) {
SORTMERNA_INDEX(
[[], []],
ch_sortmerna_fastas,
[[], []],
)
ch_sortmerna_index = SORTMERNA_INDEX.out.index.first()
}

SORTMERNA(
ch_filtered_reads,
ch_sortmerna_fastas,
ch_sortmerna_index,
)

SORTMERNA.out.reads.set { ch_filtered_reads }

ch_multiqc_files = ch_multiqc_files.mix(SORTMERNA.out.log)

ch_versions = ch_versions.mix(SORTMERNA.out.versions.first())
}
else if (ribo_removal_tool == 'ribodetector') {
// Run seqkit stats to determine average read length
SEQKIT_STATS(
ch_filtered_reads
)

ch_versions = ch_versions.mix(SEQKIT_STATS.out.versions.first())

// Join stats with reads and calculate read length for RiboDetector
ch_filtered_reads
.join(SEQKIT_STATS.out.stats)
.multiMap { meta, reads, stats ->
def readLength = getReadLengthFromSeqkitStats(stats)
reads: [meta, reads]
length: readLength
}
.set { ch_reads_with_length }

RIBODETECTOR(
ch_reads_with_length.reads,
ch_reads_with_length.length,
)

RIBODETECTOR.out.fastq.set { ch_filtered_reads }

ch_multiqc_files = ch_multiqc_files.mix(RIBODETECTOR.out.log)
FASTQ_REMOVE_RRNA(
ch_filtered_reads,
ch_rrna_fastas,
ch_sortmerna_index,
ch_bowtie2_index,
ribo_removal_tool,
make_sortmerna_index,
make_bowtie2_index,
)

ch_versions = ch_versions.mix(RIBODETECTOR.out.versions.first())
}
ch_filtered_reads = FASTQ_REMOVE_RRNA.out.reads
ch_multiqc_files = ch_multiqc_files.mix(FASTQ_REMOVE_RRNA.out.multiqc_files)
ch_versions = ch_versions.mix(FASTQ_REMOVE_RRNA.out.versions)

if (!skip_linting) {
FQ_LINT_AFTER_RIBO_REMOVAL(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,9 @@ keywords:
- strandedness
components:
- bbmap/bbsplit
- samtools/sort
- samtools/index
- cat
- cat/fastq
- fq/lint
- ribodetector
- seqkit/stats
- sortmerna
- fastq_remove_rrna
- fastq_subsample_fq_salmon
- fastq_fastqc_umitools_trimgalore
- fastq_fastqc_umitools_fastp
Expand Down Expand Up @@ -79,6 +74,15 @@ input:
- index:
type: directory
description: SortMeRNA index directory
- ch_bowtie2_index:
description: Directory containing bowtie2 index for rRNA removal
structure:
- meta:
type: map
description: Metadata for the Bowtie2 index
- index:
type: directory
description: Bowtie2 index directory
- ch_bbsplit_index:
description: Path to directory or tar.gz archive for pre-built BBSplit index
structure:
Expand All @@ -90,7 +94,7 @@ input:
description: BBSplit index directory or tar.gz archive
pattern: "{*,*.tar.gz}"
- ch_rrna_fastas:
description: Channel containing one or more FASTA files containing rRNA sequences for use with SortMeRNA
description: Channel containing one or more FASTA files containing rRNA sequences for use with SortMeRNA or Bowtie2
structure:
- meta:
type: map
Expand Down Expand Up @@ -120,6 +124,9 @@ input:
- make_sortmerna_index:
type: boolean
description: Whether to create sortmerna index before running sortmerna
- make_bowtie2_index:
type: boolean
description: Whether to create bowtie2 index before running bowtie2 for rRNA removal
- trimmer:
type: string
description: Specifies the trimming tool to use
Expand All @@ -140,7 +147,7 @@ input:
- ribo_removal_tool:
type: string
description: Specifies the rRNA removal tool to use
enum: ["sortmerna", "ribodetector"]
enum: ["sortmerna", "ribodetector", "bowtie2"]
- with_umi:
type: boolean
description: Enable UMI-based read deduplication
Expand Down
Loading
Loading