11include { BBMAP_BBSPLIT } from ' ../../../modules/nf-core/bbmap/bbsplit'
2+ include { BOWTIE2_ALIGN } from ' ../../../modules/nf-core/bowtie2/align/main'
3+ include { BOWTIE2_ALIGN as BOWTIE2_ALIGN_PE } from ' ../../../modules/nf-core/bowtie2/align/main'
4+ include { BOWTIE2_BUILD } from ' ../../../modules/nf-core/bowtie2/build/main'
25include { CAT_FASTQ } from ' ../../../modules/nf-core/cat/fastq/main'
6+ include { SAMTOOLS_VIEW as SAMTOOLS_VIEW_BOWTIE2 } from ' ../../../modules/nf-core/samtools/view/main'
7+ include { SAMTOOLS_FASTQ as SAMTOOLS_FASTQ_BOWTIE2 } from ' ../../../modules/nf-core/samtools/fastq/main'
38include { RIBODETECTOR } from ' ../../../modules/nf-core/ribodetector/main'
49include { SEQKIT_STATS } from ' ../../../modules/nf-core/seqkit/stats/main'
510include { SORTMERNA } from ' ../../../modules/nf-core/sortmerna/main'
@@ -116,8 +121,9 @@ workflow FASTQ_QC_TRIM_FILTER_SETSTRANDEDNESS {
116121 ch_gtf // channel: /path/to/genome.gtf
117122 ch_salmon_index // channel: /path/to/salmon/index/ (optional)
118123 ch_sortmerna_index // channel: /path/to/sortmerna/index/ (optional)
124+ ch_bowtie2_index // channel: /path/to/bowtie2/index/ (optional)
119125 ch_bbsplit_index // channel: /path/to/bbsplit/index/ (optional)
120- ch_rrna_fastas // channel: one or more fasta files containing rrna sequences to be passed to SortMeRNA (optional)
126+ ch_rrna_fastas // channel: one or more fasta files containing rrna sequences to be passed to SortMeRNA/Bowtie2 (optional)
121127
122128 // Skip options
123129 skip_bbsplit // boolean: Skip BBSplit for removal of non-reference genome reads.
@@ -129,6 +135,7 @@ workflow FASTQ_QC_TRIM_FILTER_SETSTRANDEDNESS {
129135 // Index generation
130136 make_salmon_index // boolean: Whether to create salmon index before running salmon quant
131137 make_sortmerna_index // boolean: Whether to create a sortmerna index before running sortmerna
138+ make_bowtie2_index // boolean: Whether to create a bowtie2 index before running bowtie2
132139
133140 // Trimming options
134141 trimmer // string (enum): 'fastp' or 'trimgalore'
@@ -138,7 +145,7 @@ workflow FASTQ_QC_TRIM_FILTER_SETSTRANDEDNESS {
138145
139146 // rRNA removal options
140147 remove_ribo_rna // boolean: true/false: whether to remove rRNA
141- ribo_removal_tool // string (enum): 'sortmerna' or 'ribodetector '
148+ ribo_removal_tool // string (enum): 'sortmerna', 'ribodetector', or 'bowtie2 '
142149
143150 // UMI options
144151 with_umi // boolean: true/false: Enable UMI-based read deduplication.
@@ -352,6 +359,86 @@ workflow FASTQ_QC_TRIM_FILTER_SETSTRANDEDNESS {
352359
353360 ch_versions = ch_versions. mix(RIBODETECTOR . out. versions. first())
354361 }
362+ else if (ribo_removal_tool == ' bowtie2' ) {
363+ if (make_bowtie2_index) {
364+ // Collect all fastas into a single file for index building
365+ // Convert U to T since rRNA references may contain RNA (U) but reads are DNA (T)
366+ ch_rrna_fastas
367+ .collectFile(name : ' rrna_combined.fasta' , newLine : true )
368+ .map { fasta ->
369+ def content = fasta. text. replaceAll(' U' , ' T' ). replaceAll(' u' , ' t' )
370+ def convertedFasta = file(" ${ fasta.parent} /rrna_combined_dna.fasta" )
371+ convertedFasta. text = content
372+ [[id : ' rrna_refs' ], convertedFasta]
373+ }
374+ .set { ch_combined_fasta }
375+
376+ BOWTIE2_BUILD (
377+ ch_combined_fasta
378+ )
379+ ch_bowtie2_index = BOWTIE2_BUILD . out. index. first()
380+ ch_versions = ch_versions. mix(BOWTIE2_BUILD . out. versions. first())
381+ }
382+
383+ // Branch reads by single-end vs paired-end for different filtering strategies
384+ ch_filtered_reads
385+ .branch { meta, reads ->
386+ single_end : meta. single_end
387+ paired_end : ! meta. single_end
388+ }
389+ .set { ch_reads_for_bowtie2 }
390+
391+ // For single-end reads: bowtie2's --un-gz works correctly
392+ // save_unaligned=true outputs unmapped reads directly
393+ BOWTIE2_ALIGN (
394+ ch_reads_for_bowtie2. single_end,
395+ ch_bowtie2_index,
396+ [[], []], // No reference fasta needed
397+ true , // save_unaligned - for single-end this works correctly
398+ false , // sort_bam - not needed
399+ )
400+
401+ ch_multiqc_files = ch_multiqc_files. mix(BOWTIE2_ALIGN . out. log)
402+ ch_versions = ch_versions. mix(BOWTIE2_ALIGN . out. versions. first())
403+
404+ // For paired-end reads: bowtie2's --un-conc-gz outputs pairs that didn't
405+ // align concordantly, which INCLUDES pairs where one mate aligned.
406+ // We need to filter via samtools to get pairs where BOTH mates are unmapped.
407+ BOWTIE2_ALIGN_PE (
408+ ch_reads_for_bowtie2. paired_end,
409+ ch_bowtie2_index,
410+ [[], []], // No reference fasta needed for BAM output
411+ false , // save_unaligned - we'll extract from BAM instead
412+ false , // sort_bam - not needed
413+ )
414+
415+ ch_multiqc_files = ch_multiqc_files. mix(BOWTIE2_ALIGN_PE . out. log)
416+ ch_versions = ch_versions. mix(BOWTIE2_ALIGN_PE . out. versions. first())
417+
418+ // Filter BAM for read pairs where BOTH mates are unmapped (flag 12 = 4 + 8)
419+ // This removes any pair where at least one mate aligned to rRNA
420+ SAMTOOLS_VIEW_BOWTIE2 (
421+ BOWTIE2_ALIGN_PE . out. bam. map { meta , bam -> [meta, bam, []] },
422+ [[], []], // No reference fasta
423+ [], // No qname file
424+ [] // No index format
425+ )
426+
427+ ch_versions = ch_versions. mix(SAMTOOLS_VIEW_BOWTIE2 . out. versions. first())
428+
429+ // Convert filtered BAM back to paired FASTQ
430+ SAMTOOLS_FASTQ_BOWTIE2 (
431+ SAMTOOLS_VIEW_BOWTIE2 . out. bam,
432+ false // not interleaved
433+ )
434+
435+ ch_versions = ch_versions. mix(SAMTOOLS_FASTQ_BOWTIE2 . out. versions. first())
436+
437+ // Combine single-end and paired-end results
438+ BOWTIE2_ALIGN . out. fastq
439+ .mix(SAMTOOLS_FASTQ_BOWTIE2 . out. fastq)
440+ .set { ch_filtered_reads }
441+ }
355442
356443 if (! skip_linting) {
357444 FQ_LINT_AFTER_RIBO_REMOVAL (
0 commit comments