Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
8a37c4f
add CREATE_ASSEMBLY_METADATA_CSV module, add versions tracking for al…
ochkalova Apr 24, 2026
4bb213f
add CREATE_GENOME_METADATA_TSV module
ochkalova Apr 24, 2026
6340b4b
refactor metadata files creating modules
ochkalova Apr 27, 2026
7cb2880
add find_concatenate module
ochkalova Apr 28, 2026
c6b7fbc
patch find/concatenate
ochkalova Apr 28, 2026
8304fbf
fix version statement for GENERATE_ASSEMBLY_MANIFEST module
ochkalova Apr 28, 2026
f217ac8
add FIND_CONCATENATE to assemblysubmit and update tests
ochkalova Apr 28, 2026
48abbfd
add FIND_CONCATENATE to genomesubmit and update tests
ochkalova Apr 28, 2026
badfb2b
add additional inputs instead of params usage subwf
ochkalova Apr 24, 2026
47b8698
add additional inputs instead of params usage in ENA_WEBIN_CLI_WRAPPER
ochkalova Apr 24, 2026
c410d5e
add additional inputs in GENOME_UPLOAD
ochkalova Apr 24, 2026
708cb1e
add additional inputs in GENERATE_ASSEMBLY_MANIFEST
ochkalova Apr 24, 2026
fd698fd
update tests
ochkalova Apr 24, 2026
689259a
fix checkm2_db definition
ochkalova Apr 24, 2026
74fa995
remove --upload_force param because it's unnecessary for a user
ochkalova Apr 24, 2026
1371dec
do not reference params in ASSEMBLYSUBMIT and GENOMESUBMIT
ochkalova Apr 27, 2026
93439b4
add test_upload parameter to REGISTERSTUDY and GENERATE_ASSEMBLY_MANI…
ochkalova Apr 27, 2026
ff2d46d
rename checkm2_db_zenodo_id to checkm2_db_download_id for consistency
ochkalova Apr 27, 2026
7a6030c
add missing REGISTERSTUDY input
ochkalova Apr 27, 2026
4cf9a0e
rename webincli_submit to webincli_mode
ochkalova Apr 28, 2026
4530c73
add CREATE_ASSEMBLY_METADATA_CSV module, add versions tracking for al…
ochkalova Apr 24, 2026
0b2d785
add CREATE_GENOME_METADATA_TSV module
ochkalova Apr 24, 2026
c7b2603
add FIND_CONCATENATE to assemblysubmit and update tests
ochkalova Apr 28, 2026
5e435bf
update snapshot
ochkalova Apr 28, 2026
555e4a1
Merge branch 'dev' into feat/metadata_modules
ochkalova Apr 28, 2026
aafeff6
update snapshot
ochkalova Apr 28, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,20 @@ process {
// SUBMISSION AND MANIFEST GENERATION
//

withName: 'CREATE_ASSEMBLY_METADATA_CSV|CREATE_GENOME_METADATA_TSV' {
publishDir = [
enabled: false
]
}

withName: 'CONCAT_METADATA|CONCAT_ACCESSIONS' {
publishDir = [
path: { "${params.outdir}/${params.mode}/upload" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: 'GENOME_UPLOAD' {
ext.args = "--force" // ensures that ENA metadata is re-downloaded even if cache exists
publishDir = [
Expand Down
6 changes: 4 additions & 2 deletions docs/output.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ When `--mode mags` or `--mode bins` is used, results are written under `mags/` o
<summary>Output files</summary>

- `<mode>/`
- `genomes_metadata.csv`: tabular metadata assembled for submission.
- `upload/assigned_accessions.tsv`: accessions assigned to submitted genomes.
- `upload/genomes_metadata.tsv`: tabular metadata gathered for submission.
- `upload/manifests/`: manifests generated by `genome_uploader`.
- `databases/checkm2/`: downloaded CheckM2 database file (when downloaded during the run).
- `databases/cat_pack/`: prepared CAT_pack database directories (when generated during the run).
Expand All @@ -41,7 +42,8 @@ When `--mode metagenomic_assemblies` is used, results are written under `metagen
<summary>Output files</summary>

- `metagenomic_assemblies/`
- `<sample>_assembly_metadata.csv`: per-assembly metadata CSV generated for manifest creation.
- `upload/assigned_accessions.tsv`: accessions assigned to submitted assemblies.
- `upload/assemblies_metadata.csv`: metadata CSV generated for manifest creation.
- `<sample>/coverage/`: `coverm contig` output for samples where `coverage` had to be calculated from reads.

</details>
Expand Down
6 changes: 6 additions & 0 deletions modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,12 @@
"installed_by": ["modules"],
"patch": "modules/nf-core/fastavalidator/fastavalidator.diff"
},
"find/concatenate": {
"branch": "master",
"git_sha": "6d46786420b4d7bc88eba026eb389c0c5535d120",
"installed_by": ["modules"],
"patch": "modules/nf-core/find/concatenate/find-concatenate.diff"
},
"multiqc": {
"branch": "master",
"git_sha": "af27af1be706e6a2bb8fe454175b0cdf77f47b49",
Expand Down
6 changes: 6 additions & 0 deletions modules/local/create_assembly_metadata_csv/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
channels:
- conda-forge
- bioconda
dependencies:
- conda-forge::bash=5.2.37
51 changes: 51 additions & 0 deletions modules/local/create_assembly_metadata_csv/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
process CREATE_ASSEMBLY_METADATA_CSV {
tag "${meta.id}"
label 'process_single'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'oras://community.wave.seqera.io/library/bash:5.2.26' :
'community.wave.seqera.io/library/bash:5.2.37--ae00789afb795adf' }"

input:
tuple val(meta), path(fasta)

output:
tuple val(meta), path("${meta.id}_assembly_metadata.csv"), emit: csv
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def header = 'Runs,Coverage,Assembler,Version,Filepath,Sample'
def row = [
meta.run_accession,
meta.coverage,
meta.assembler,
meta.assembler_version,
fasta.name,
'' // Sample column left empty because co-assemblies are not supported
].join(',')
"""
cat <<-END_CSV > ${meta.id}_assembly_metadata.csv
${header}
${row}
END_CSV

cat <<-END_VERSIONS > versions.yml
"${task.process}":
bash: \$(bash --version | head -n1 | sed 's/.*version //; s/ .*//')
END_VERSIONS
"""

stub:
"""
touch ${meta.id}_assembly_metadata.csv

cat <<-END_VERSIONS > versions.yml
"${task.process}":
bash: \$(bash --version | head -n1 | sed 's/.*version //; s/ .*//')
END_VERSIONS
"""
}
48 changes: 48 additions & 0 deletions modules/local/create_assembly_metadata_csv/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
name: "create_assembly_metadata_csv"
description: Create input CSV metadata file for assembly_uploader tool
keywords:
- assembly
- metadata
- ena
- submission
tools:
- bash:
description: Bash shell scripting
homepage: https://www.gnu.org/software/bash/

input:
- - meta:
type: map
description: |
Groovy Map containing sample information:
- id: Sample identifier
- run_accession: ENA run accession
- coverage: Assembly coverage value
- assembler: Name of the assembler used
- assembler_version: Version of the assembler
- fasta:
type: file
description: Assembly FASTA file
pattern: "*.{fasta,fa,fna}"

output:
- csv:
- meta:
type: map
description: |
Groovy Map containing sample information (same as input)
- "*.csv":
type: file
description: CSV file containing assembly metadata to be used as input for the assembly_uploader
pattern: "*_assembly_metadata.csv"
- versions:
- versions.yml:
type: file
description: File containing software versions
pattern: "versions.yml"

authors:
- "@ochkalova"
maintainers:
- "@ochkalova"
69 changes: 69 additions & 0 deletions modules/local/create_assembly_metadata_csv/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
nextflow_process {

name "Test Process CREATE_ASSEMBLY_METADATA_CSV"
script "../main.nf"
process "CREATE_ASSEMBLY_METADATA_CSV"

tag "modules"
tag "modules_local"
tag "create_assembly_metadata_csv"

test("test_create_assembly_metadata_csv") {

when {
process {
"""
input[0] = [
[
id: 'test_sample',
run_accession: 'ERR123456',
coverage: 50.5,
assembler: 'metaspades',
assembler_version: '3.15.5'
],
file('https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/genome/NC_045512.2/GCF_009858895.2_ASM985889v3_genomic.200409.fna.gz')
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

test("test_create_assembly_metadata_csv - stub") {

options "-stub"

when {
process {
"""
input[0] = [
[
id: 'stub_sample',
run_accession: 'ERR999999',
coverage: 75.0,
assembler: 'megahit',
assembler_version: '1.2.9'
],
file('https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/genome/NC_045512.2/GCF_009858895.2_ASM985889v3_genomic.200409.fna.gz')
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

}
84 changes: 84 additions & 0 deletions modules/local/create_assembly_metadata_csv/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
{
"test_create_assembly_metadata_csv": {
"content": [
{
"0": [
[
{
"id": "test_sample",
"run_accession": "ERR123456",
"coverage": 50.5,
"assembler": "metaspades",
"assembler_version": "3.15.5"
},
"test_sample_assembly_metadata.csv:md5,0ad41b6b4e778fb03fc224bae9ba4529"
]
],
"1": [
"versions.yml:md5,1dbd8a6b5ae469b5f51d5c96b7ab3225"
],
"csv": [
[
{
"id": "test_sample",
"run_accession": "ERR123456",
"coverage": 50.5,
"assembler": "metaspades",
"assembler_version": "3.15.5"
},
"test_sample_assembly_metadata.csv:md5,0ad41b6b4e778fb03fc224bae9ba4529"
]
],
"versions": [
"versions.yml:md5,1dbd8a6b5ae469b5f51d5c96b7ab3225"
]
}
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "25.04.1"
},
"timestamp": "2026-04-27T14:51:27.130061"
},
"test_create_assembly_metadata_csv - stub": {
"content": [
{
"0": [
[
{
"id": "stub_sample",
"run_accession": "ERR999999",
"coverage": 75.0,
"assembler": "megahit",
"assembler_version": "1.2.9"
},
"stub_sample_assembly_metadata.csv:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"1": [
"versions.yml:md5,1dbd8a6b5ae469b5f51d5c96b7ab3225"
],
"csv": [
[
{
"id": "stub_sample",
"run_accession": "ERR999999",
"coverage": 75.0,
"assembler": "megahit",
"assembler_version": "1.2.9"
},
"stub_sample_assembly_metadata.csv:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"versions": [
"versions.yml:md5,1dbd8a6b5ae469b5f51d5c96b7ab3225"
]
}
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "25.04.1"
},
"timestamp": "2026-04-27T14:51:32.249848"
}
}
6 changes: 6 additions & 0 deletions modules/local/create_genome_metadata_tsv/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
channels:
- conda-forge
- bioconda
dependencies:
- conda-forge::bash=5.2.37
Loading
Loading