Skip to content

Commit 73468c8

Browse files
pinin4fjordsclaude
andcommitted
feat(fastq_qc_trim_filter_setstrandedness): Add Bowtie2 as alternative rRNA removal tool
Add bowtie2 as a third option for rRNA removal alongside sortmerna and ribodetector. Implementation details: - Paired-end: Uses samtools view -f 12 to filter pairs where BOTH mates are unmapped (bowtie2's --un-conc-gz incorrectly includes pairs where one mate aligned) - Single-end: Uses bowtie2's --un-gz directly via save_unaligned=true - Converts U→T in rRNA reference FASTAs (RNA sequences contain U, reads contain T) Changes: - Add BOWTIE2_ALIGN, BOWTIE2_ALIGN_PE, BOWTIE2_BUILD module imports - Add SAMTOOLS_VIEW and SAMTOOLS_FASTQ for paired-end filtering - Add ch_bowtie2_index input and make_bowtie2_index parameter - Update meta.yml with bowtie2 in ribo_removal_tool enum - Add paired-end and single-end bowtie2 test cases 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <[email protected]>
1 parent f3fb87a commit 73468c8

File tree

5 files changed

+406
-61
lines changed

5 files changed

+406
-61
lines changed

subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/main.nf

Lines changed: 89 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
include { BBMAP_BBSPLIT } from '../../../modules/nf-core/bbmap/bbsplit'
2+
include { BOWTIE2_ALIGN } from '../../../modules/nf-core/bowtie2/align/main'
3+
include { BOWTIE2_ALIGN as BOWTIE2_ALIGN_PE } from '../../../modules/nf-core/bowtie2/align/main'
4+
include { BOWTIE2_BUILD } from '../../../modules/nf-core/bowtie2/build/main'
25
include { CAT_FASTQ } from '../../../modules/nf-core/cat/fastq/main'
6+
include { SAMTOOLS_VIEW as SAMTOOLS_VIEW_BOWTIE2 } from '../../../modules/nf-core/samtools/view/main'
7+
include { SAMTOOLS_FASTQ as SAMTOOLS_FASTQ_BOWTIE2 } from '../../../modules/nf-core/samtools/fastq/main'
38
include { RIBODETECTOR } from '../../../modules/nf-core/ribodetector/main'
49
include { SEQKIT_STATS } from '../../../modules/nf-core/seqkit/stats/main'
510
include { SORTMERNA } from '../../../modules/nf-core/sortmerna/main'
@@ -116,8 +121,9 @@ workflow FASTQ_QC_TRIM_FILTER_SETSTRANDEDNESS {
116121
ch_gtf // channel: /path/to/genome.gtf
117122
ch_salmon_index // channel: /path/to/salmon/index/ (optional)
118123
ch_sortmerna_index // channel: /path/to/sortmerna/index/ (optional)
124+
ch_bowtie2_index // channel: /path/to/bowtie2/index/ (optional)
119125
ch_bbsplit_index // channel: /path/to/bbsplit/index/ (optional)
120-
ch_rrna_fastas // channel: one or more fasta files containing rrna sequences to be passed to SortMeRNA (optional)
126+
ch_rrna_fastas // channel: one or more fasta files containing rrna sequences to be passed to SortMeRNA/Bowtie2 (optional)
121127

122128
// Skip options
123129
skip_bbsplit // boolean: Skip BBSplit for removal of non-reference genome reads.
@@ -129,6 +135,7 @@ workflow FASTQ_QC_TRIM_FILTER_SETSTRANDEDNESS {
129135
// Index generation
130136
make_salmon_index // boolean: Whether to create salmon index before running salmon quant
131137
make_sortmerna_index // boolean: Whether to create a sortmerna index before running sortmerna
138+
make_bowtie2_index // boolean: Whether to create a bowtie2 index before running bowtie2
132139

133140
// Trimming options
134141
trimmer // string (enum): 'fastp' or 'trimgalore'
@@ -138,7 +145,7 @@ workflow FASTQ_QC_TRIM_FILTER_SETSTRANDEDNESS {
138145

139146
// rRNA removal options
140147
remove_ribo_rna // boolean: true/false: whether to remove rRNA
141-
ribo_removal_tool // string (enum): 'sortmerna' or 'ribodetector'
148+
ribo_removal_tool // string (enum): 'sortmerna', 'ribodetector', or 'bowtie2'
142149

143150
// UMI options
144151
with_umi // boolean: true/false: Enable UMI-based read deduplication.
@@ -352,6 +359,86 @@ workflow FASTQ_QC_TRIM_FILTER_SETSTRANDEDNESS {
352359

353360
ch_versions = ch_versions.mix(RIBODETECTOR.out.versions.first())
354361
}
362+
else if (ribo_removal_tool == 'bowtie2') {
363+
if (make_bowtie2_index) {
364+
// Collect all fastas into a single file for index building
365+
// Convert U to T since rRNA references may contain RNA (U) but reads are DNA (T)
366+
ch_rrna_fastas
367+
.collectFile(name: 'rrna_combined.fasta', newLine: true)
368+
.map { fasta ->
369+
def content = fasta.text.replaceAll('U', 'T').replaceAll('u', 't')
370+
def convertedFasta = file("${fasta.parent}/rrna_combined_dna.fasta")
371+
convertedFasta.text = content
372+
[[id: 'rrna_refs'], convertedFasta]
373+
}
374+
.set { ch_combined_fasta }
375+
376+
BOWTIE2_BUILD(
377+
ch_combined_fasta
378+
)
379+
ch_bowtie2_index = BOWTIE2_BUILD.out.index.first()
380+
ch_versions = ch_versions.mix(BOWTIE2_BUILD.out.versions.first())
381+
}
382+
383+
// Branch reads by single-end vs paired-end for different filtering strategies
384+
ch_filtered_reads
385+
.branch { meta, reads ->
386+
single_end: meta.single_end
387+
paired_end: !meta.single_end
388+
}
389+
.set { ch_reads_for_bowtie2 }
390+
391+
// For single-end reads: bowtie2's --un-gz works correctly
392+
// save_unaligned=true outputs unmapped reads directly
393+
BOWTIE2_ALIGN(
394+
ch_reads_for_bowtie2.single_end,
395+
ch_bowtie2_index,
396+
[[], []], // No reference fasta needed
397+
true, // save_unaligned - for single-end this works correctly
398+
false, // sort_bam - not needed
399+
)
400+
401+
ch_multiqc_files = ch_multiqc_files.mix(BOWTIE2_ALIGN.out.log)
402+
ch_versions = ch_versions.mix(BOWTIE2_ALIGN.out.versions.first())
403+
404+
// For paired-end reads: bowtie2's --un-conc-gz outputs pairs that didn't
405+
// align concordantly, which INCLUDES pairs where one mate aligned.
406+
// We need to filter via samtools to get pairs where BOTH mates are unmapped.
407+
BOWTIE2_ALIGN_PE(
408+
ch_reads_for_bowtie2.paired_end,
409+
ch_bowtie2_index,
410+
[[], []], // No reference fasta needed for BAM output
411+
false, // save_unaligned - we'll extract from BAM instead
412+
false, // sort_bam - not needed
413+
)
414+
415+
ch_multiqc_files = ch_multiqc_files.mix(BOWTIE2_ALIGN_PE.out.log)
416+
ch_versions = ch_versions.mix(BOWTIE2_ALIGN_PE.out.versions.first())
417+
418+
// Filter BAM for read pairs where BOTH mates are unmapped (flag 12 = 4 + 8)
419+
// This removes any pair where at least one mate aligned to rRNA
420+
SAMTOOLS_VIEW_BOWTIE2(
421+
BOWTIE2_ALIGN_PE.out.bam.map { meta, bam -> [meta, bam, []] },
422+
[[], []], // No reference fasta
423+
[], // No qname file
424+
[] // No index format
425+
)
426+
427+
ch_versions = ch_versions.mix(SAMTOOLS_VIEW_BOWTIE2.out.versions.first())
428+
429+
// Convert filtered BAM back to paired FASTQ
430+
SAMTOOLS_FASTQ_BOWTIE2(
431+
SAMTOOLS_VIEW_BOWTIE2.out.bam,
432+
false // not interleaved
433+
)
434+
435+
ch_versions = ch_versions.mix(SAMTOOLS_FASTQ_BOWTIE2.out.versions.first())
436+
437+
// Combine single-end and paired-end results
438+
BOWTIE2_ALIGN.out.fastq
439+
.mix(SAMTOOLS_FASTQ_BOWTIE2.out.fastq)
440+
.set { ch_filtered_reads }
441+
}
355442

356443
if (!skip_linting) {
357444
FQ_LINT_AFTER_RIBO_REMOVAL(

subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/meta.yml

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,12 @@ keywords:
99
- strandedness
1010
components:
1111
- bbmap/bbsplit
12+
- bowtie2/align
13+
- bowtie2/build
14+
- samtools/fastq
1215
- samtools/sort
1316
- samtools/index
17+
- samtools/view
1418
- cat
1519
- cat/fastq
1620
- fq/lint
@@ -79,6 +83,15 @@ input:
7983
- index:
8084
type: directory
8185
description: SortMeRNA index directory
86+
- ch_bowtie2_index:
87+
description: Directory containing bowtie2 index for rRNA removal
88+
structure:
89+
- meta:
90+
type: map
91+
description: Metadata for the Bowtie2 index
92+
- index:
93+
type: directory
94+
description: Bowtie2 index directory
8295
- ch_bbsplit_index:
8396
description: Path to directory or tar.gz archive for pre-built BBSplit index
8497
structure:
@@ -90,7 +103,7 @@ input:
90103
description: BBSplit index directory or tar.gz archive
91104
pattern: "{*,*.tar.gz}"
92105
- ch_rrna_fastas:
93-
description: Channel containing one or more FASTA files containing rRNA sequences for use with SortMeRNA
106+
description: Channel containing one or more FASTA files containing rRNA sequences for use with SortMeRNA or Bowtie2
94107
structure:
95108
- meta:
96109
type: map
@@ -120,6 +133,9 @@ input:
120133
- make_sortmerna_index:
121134
type: boolean
122135
description: Whether to create sortmerna index before running sortmerna
136+
- make_bowtie2_index:
137+
type: boolean
138+
description: Whether to create bowtie2 index before running bowtie2 for rRNA removal
123139
- trimmer:
124140
type: string
125141
description: Specifies the trimming tool to use
@@ -140,7 +156,7 @@ input:
140156
- ribo_removal_tool:
141157
type: string
142158
description: Specifies the rRNA removal tool to use
143-
enum: ["sortmerna", "ribodetector"]
159+
enum: ["sortmerna", "ribodetector", "bowtie2"]
144160
- with_umi:
145161
type: boolean
146162
description: Enable UMI-based read deduplication

0 commit comments

Comments
 (0)