diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index b290e0901..97c8c97fe 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,20 +1,20 @@ { "name": "nfcore", - "image": "nfcore/gitpod:latest", - "remoteUser": "gitpod", - "runArgs": ["--privileged"], + "image": "nfcore/devcontainer:latest", - // Configure tool-specific properties. - "customizations": { - // Configure properties specific to VS Code. - "vscode": { - // Set *default* container specific settings.json values on container create. - "settings": { - "python.defaultInterpreterPath": "/opt/conda/bin/python" - }, + "remoteUser": "root", + "privileged": true, - // Add the IDs of extensions you want installed when the container is created. - "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"] - } + "remoteEnv": { + // Workspace path on the host for mounting with docker-outside-of-docker + "LOCAL_WORKSPACE_FOLDER": "${localWorkspaceFolder}" + }, + + "onCreateCommand": "./.devcontainer/setup.sh", + + "hostRequirements": { + "cpus": 4, + "memory": "16gb", + "storage": "32gb" } } diff --git a/.devcontainer/setup.sh b/.devcontainer/setup.sh new file mode 100755 index 000000000..200013fc0 --- /dev/null +++ b/.devcontainer/setup.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash + +# Customise the terminal command prompt +echo "export PROMPT_DIRTRIM=2" >> $HOME/.bashrc +echo "export PS1='\[\e[3;36m\]\w ->\[\e[0m\\] '" >> $HOME/.bashrc +export PROMPT_DIRTRIM=2 +export PS1='\[\e[3;36m\]\w ->\[\e[0m\\] ' + +# Update Nextflow +nextflow self-update + +# Update welcome message +echo "Welcome to the nf-core/rnaseq devcontainer!" > /usr/local/etc/vscode-dev-containers/first-run-notice.txt diff --git a/.github/actions/nf-test/action.yml b/.github/actions/nf-test/action.yml index 896c0c7ff..49c861366 100644 --- a/.github/actions/nf-test/action.yml +++ b/.github/actions/nf-test/action.yml @@ -25,9 +25,9 @@ runs: version: "${{ env.NXF_VERSION }}" - name: Set up Python - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 + uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6 with: - python-version: "3.13" + python-version: "3.14" - name: Install nf-test uses: nf-core/setup-nf-test@v1 @@ -52,6 +52,8 @@ runs: with: auto-update-conda: true conda-solver: libmamba + channels: conda-forge + channel-priority: strict conda-remove-defaults: true # Set up secrets diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index 27db9ad30..d3f402cf3 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -31,22 +31,22 @@ jobs: # Add full size test data (but still relatively small datasets for few samples) # on the `test_full.config` test runs with only one set of parameters with: - workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} + workspace_id: ${{ vars.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} - compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + compute_env: ${{ vars.TOWER_COMPUTE_ENV }} revision: ${{ steps.revision.outputs.revision }} - workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/rnaseq/work-${{ steps.revision.outputs.revision }} + workdir: s3://${{ vars.AWS_S3_BUCKET }}/work/rnaseq/work-${{ steps.revision.outputs.revision }} parameters: | { "hook_url": "${{ secrets.MEGATESTS_ALERTS_SLACK_HOOK_URL }}", "aligner": "${{ matrix.aligner }}", - "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/rnaseq/results-${{ steps.revision.outputs.revision }}/aligner_${{ matrix.aligner }}/" + "outdir": "s3://${{ vars.AWS_S3_BUCKET }}/rnaseq/results-${{ steps.revision.outputs.revision }}/aligner_${{ matrix.aligner }}/" } profiles: test_full - - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 + - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5 with: name: Seqera Platform debug log file path: | - seqera_platform_action_*.log - seqera_platform_action_*.json + tower_action_*.log + tower_action_*.json diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index f5c170fd7..0a85c67f8 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -14,20 +14,20 @@ jobs: - name: Launch workflow via Seqera Platform uses: seqeralabs/action-tower-launch@v2 with: - workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} + workspace_id: ${{ vars.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} - compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + compute_env: ${{ vars.TOWER_COMPUTE_ENV }} revision: ${{ github.sha }} - workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/rnaseq/work-${{ github.sha }} + workdir: s3://${{ vars.AWS_S3_BUCKET }}/work/rnaseq/work-${{ github.sha }} parameters: | { - "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/rnaseq/results-test-${{ github.sha }}" + "outdir": "s3://${{ vars.AWS_S3_BUCKET }}/rnaseq/results-test-${{ github.sha }}" } profiles: test - - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 + - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5 with: name: Seqera Platform debug log file path: | - seqera_platform_action_*.log - seqera_platform_action_*.json + tower_action_*.log + tower_action_*.json diff --git a/.github/workflows/clean-up.yml b/.github/workflows/clean-up.yml index ac030fd58..6adb0fff4 100644 --- a/.github/workflows/clean-up.yml +++ b/.github/workflows/clean-up.yml @@ -10,7 +10,7 @@ jobs: issues: write pull-requests: write steps: - - uses: actions/stale@5bef64f19d7facfb25b37b414482c7164d639639 # v9 + - uses: actions/stale@5f858e3efba33a5ca4407a664cc011ad407f2008 # v10 with: stale-issue-message: "This issue has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment otherwise this issue will be closed in 20 days." stale-pr-message: "This PR has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment if it is still useful." diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml index 999bcc382..45884ff90 100644 --- a/.github/workflows/download_pipeline.yml +++ b/.github/workflows/download_pipeline.yml @@ -44,9 +44,9 @@ jobs: - name: Disk space cleanup uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 - - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 + - uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6 with: - python-version: "3.13" + python-version: "3.14" architecture: "x64" - name: Setup Apptainer @@ -57,7 +57,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install git+https://github.com/nf-core/tools.git@dev + pip install git+https://github.com/nf-core/tools.git - name: Make a cache directory for the container images run: | @@ -127,7 +127,7 @@ jobs: fi - name: Upload Nextflow logfile for debugging purposes - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5 with: name: nextflow_logfile.txt path: .nextflow.log* diff --git a/.github/workflows/fix_linting.yml b/.github/workflows/fix_linting.yml index 62a34d5ef..0d99fdd5b 100644 --- a/.github/workflows/fix_linting.yml +++ b/.github/workflows/fix_linting.yml @@ -13,13 +13,13 @@ jobs: runs-on: ubuntu-latest steps: # Use the @nf-core-bot token to check out so we can push later - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 with: token: ${{ secrets.nf_core_bot_auth_token }} # indication that the linting is being fixed - name: React on comment - uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + uses: peter-evans/create-or-update-comment@e8674b075228eee787fea43ef493e45ece1004c9 # v5 with: comment-id: ${{ github.event.comment.id }} reactions: eyes @@ -32,9 +32,9 @@ jobs: GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} # Install and run pre-commit - - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 + - uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6 with: - python-version: "3.13" + python-version: "3.14" - name: Install pre-commit run: pip install pre-commit @@ -47,7 +47,7 @@ jobs: # indication that the linting has finished - name: react if linting finished succesfully if: steps.pre-commit.outcome == 'success' - uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + uses: peter-evans/create-or-update-comment@e8674b075228eee787fea43ef493e45ece1004c9 # v5 with: comment-id: ${{ github.event.comment.id }} reactions: "+1" @@ -67,21 +67,21 @@ jobs: - name: react if linting errors were fixed id: react-if-fixed if: steps.commit-and-push.outcome == 'success' - uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + uses: peter-evans/create-or-update-comment@e8674b075228eee787fea43ef493e45ece1004c9 # v5 with: comment-id: ${{ github.event.comment.id }} reactions: hooray - name: react if linting errors were not fixed if: steps.commit-and-push.outcome == 'failure' - uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + uses: peter-evans/create-or-update-comment@e8674b075228eee787fea43ef493e45ece1004c9 # v5 with: comment-id: ${{ github.event.comment.id }} reactions: confused - name: react if linting errors were not fixed if: steps.commit-and-push.outcome == 'failure' - uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + uses: peter-evans/create-or-update-comment@e8674b075228eee787fea43ef493e45ece1004c9 # v5 with: issue-number: ${{ github.event.issue.number }} body: | diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 8b0f88c36..7a527a346 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -11,12 +11,12 @@ jobs: pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 - - name: Set up Python 3.13 - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 + - name: Set up Python 3.14 + uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6 with: - python-version: "3.13" + python-version: "3.14" - name: Install pre-commit run: pip install pre-commit @@ -28,14 +28,14 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 - name: Install Nextflow uses: nf-core/setup-nextflow@v2 - - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 + - uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6 with: - python-version: "3.13" + python-version: "3.14" architecture: "x64" - name: read .nf-core.yml @@ -71,7 +71,7 @@ jobs: - name: Upload linting log file artifact if: ${{ always() }} - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5 with: name: linting-logs path: | diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index d43797d9d..e6e9bc269 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -21,7 +21,7 @@ jobs: run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT - name: Post PR comment - uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2 + uses: marocchino/sticky-pull-request-comment@773744901bac0e8cbb5a0dc842800d45e9b2b405 # v2 with: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} number: ${{ steps.pr_number.outputs.pr_number }} diff --git a/.github/workflows/nf-test.yml b/.github/workflows/nf-test.yml index 1560da19d..848696f60 100644 --- a/.github/workflows/nf-test.yml +++ b/.github/workflows/nf-test.yml @@ -1,12 +1,6 @@ name: Run nf-test on: pull_request: - paths-ignore: - - "docs/**" - - "**/meta.yml" - - "**/*.md" - - "**/*.png" - - "**/*.svg" release: types: [published] workflow_dispatch: @@ -18,9 +12,7 @@ concurrency: env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - NFT_DIFF: "pdiff" - NFT_DIFF_ARGS: "--line-numbers --width 120 --expand-tabs=2" - NFT_VER: "0.9.2" + NFT_VER: "0.9.3" NFT_WORKDIR: "~" NXF_ANSI_LOG: false NXF_SINGULARITY_CACHEDIR: ${{ github.workspace }}/.singularity @@ -42,7 +34,7 @@ jobs: rm -rf ./* || true rm -rf ./.??* || true ls -la ./ - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 with: fetch-depth: 0 @@ -80,14 +72,14 @@ jobs: - isMain: false profile: "singularity" NXF_VER: - - "24.10.5" + - "25.04.0" - "latest-everything" env: NXF_ANSI_LOG: false TOTAL_SHARDS: ${{ needs.nf-test-changes.outputs.total_shards }} steps: - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 with: fetch-depth: 0 @@ -101,6 +93,7 @@ jobs: SENTIEON_ENCRYPTION_KEY: ${{ secrets.SENTIEON_ENCRYPTION_KEY }} SENTIEON_LICENSE_MESSAGE: ${{ secrets.SENTIEON_LICENSE_MESSAGE }} SENTIEON_LICSRVR_IP: ${{ secrets.SENTIEON_LICSRVR_IP }} + NXF_VERSION: ${{ matrix.NXF_VER }} with: profile: ${{ matrix.profile }} shard: ${{ matrix.shard }} @@ -122,7 +115,7 @@ jobs: fi confirm-pass: - needs: [nf-test] + needs: [nf-test-changes, nf-test] if: always() runs-on: # use self-hosted runners - runs-on=${{ github.run_id }}-confirm-pass @@ -143,7 +136,7 @@ jobs: - name: debug-print if: always() run: | - echo "::group::DEBUG: `needs` Contents" + echo "::group::DEBUG: needs Contents" echo "DEBUG: toJSON(needs) = ${{ toJSON(needs) }}" echo "DEBUG: toJSON(needs.*.result) = ${{ toJSON(needs.*.result) }}" echo "::endgroup::" diff --git a/.github/workflows/release-announcements.yml b/.github/workflows/release-announcements.yml index 0f7324956..3be1e9444 100644 --- a/.github/workflows/release-announcements.yml +++ b/.github/workflows/release-announcements.yml @@ -14,6 +14,11 @@ jobs: run: | echo "topics=$(curl -s https://nf-co.re/pipelines.json | jq -r '.remote_workflows[] | select(.full_name == "${{ github.repository }}") | .topics[]' | awk '{print "#"$0}' | tr '\n' ' ')" | sed 's/-//g' >> $GITHUB_OUTPUT + - name: get description + id: get_description + run: | + echo "description=$(curl -s https://nf-co.re/pipelines.json | jq -r '.remote_workflows[] | select(.full_name == "${{ github.repository }}") | .description')" >> $GITHUB_OUTPUT + - uses: rzr/fediverse-action@master with: access-token: ${{ secrets.MASTODON_ACCESS_TOKEN }} @@ -22,7 +27,7 @@ jobs: # https://docs.github.com/en/developers/webhooks-and-events/webhooks/webhook-events-and-payloads#release message: | Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! - + ${{ steps.get_description.outputs.description }} Please see the changelog: ${{ github.event.release.html_url }} ${{ steps.get_topics.outputs.topics }} #nfcore #openscience #nextflow #bioinformatics diff --git a/.github/workflows/template-version-comment.yml b/.github/workflows/template-version-comment.yml index beb5c77fb..e8560fc7c 100644 --- a/.github/workflows/template-version-comment.yml +++ b/.github/workflows/template-version-comment.yml @@ -9,7 +9,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 with: ref: ${{ github.event.pull_request.head.sha }} diff --git a/.gitpod.yml b/.gitpod.yml deleted file mode 100644 index 83599f633..000000000 --- a/.gitpod.yml +++ /dev/null @@ -1,10 +0,0 @@ -image: nfcore/gitpod:latest -tasks: - - name: Update Nextflow and setup pre-commit - command: | - pre-commit install --install-hooks - nextflow self-update - -vscode: - extensions: - - nf-core.nf-core-extensionpack # https://github.com/nf-core/vscode-extensionpack diff --git a/.nf-core.yml b/.nf-core.yml index 602cb6961..afb529e97 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1,4 +1,5 @@ lint: + actions_nf_test: false files_exist: - conf/modules.config files_unchanged: @@ -10,7 +11,7 @@ lint: nextflow_config: - config_defaults: - params.ribo_database_manifest -nf_core_version: 3.3.2 +nf_core_version: 3.5.1 repository_type: pipeline template: author: "Harshil Patel, Phil Ewels, Rickard Hammarén" @@ -21,4 +22,4 @@ template: name: rnaseq org: nf-core outdir: . - version: 3.21.0 + version: 3.22.0 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index bb41beec1..d06777a8f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,7 +6,7 @@ repos: additional_dependencies: - prettier@3.6.2 - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v5.0.0 + rev: v6.0.0 hooks: - id: trailing-whitespace args: [--markdown-linebreak-ext=md] diff --git a/.prettierignore b/.prettierignore index edd29f01e..dd749d43d 100644 --- a/.prettierignore +++ b/.prettierignore @@ -10,4 +10,7 @@ testing/ testing* *.pyc bin/ +.nf-test/ ro-crate-metadata.json +modules/nf-core/ +subworkflows/nf-core/ diff --git a/CHANGELOG.md b/CHANGELOG.md index bc674ab3b..e25b0bc76 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,41 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [[3.22.0](https://github.com/nf-core/rnaseq/releases/tag/3.22.0)] - 2025-11-26 + +### Credits + +Special thanks to the following for their contributions to the release: + +- [Ahwan Pandey](https://github.com/ahwanpandey) +- [Cristina Tuñí i Domínguez](https://github.com/ctuni) +- [Elad Herzog](https://github.com/EladH1) +- [Emily Miyoshi](https://github.com/emilymiyoshi) +- [Jonathan Manning](https://github.com/pinin4fjords) +- [Pontus Höjer](https://github.com/pontushojer) +- [Siddhartha Bagaria](https://github.com/siddharthab) + +### Enhancements and fixes + +- [PR #1608](https://github.com/nf-core/rnaseq/pull/1608) - Bump version after release 3.21.0 +- [PR #1613](https://github.com/nf-core/rnaseq/pull/1613) - Fix broken link and add latest kit version for Takara UMI prep in usage documentation +- [PR #1614](https://github.com/nf-core/rnaseq/pull/1614) - Template update for nf-core/tools v3.4.1 +- [PR #1617](https://github.com/nf-core/rnaseq/pull/1617) - Update bbmap/bbsplit module +- [PR #1618](https://github.com/nf-core/rnaseq/pull/1618) - Fix CI: Ensure confirm-pass job runs for markdown-only PRs +- [PR #1619](https://github.com/nf-core/rnaseq/pull/1619) - Update Credits to reflect current maintainership +- [PR #1620](https://github.com/nf-core/rnaseq/pull/1620) - Fix bigwig strand labeling for reverse-stranded libraries ([#1591](https://github.com/nf-core/rnaseq/issues/1591)) +- [PR #1621](https://github.com/nf-core/rnaseq/pull/1621) - Optimize qualimap performance with multi-threaded name sorting +- [PR #1622](https://github.com/nf-core/rnaseq/pull/1622) - Update tximeta/tximport module to fix sample name mangling +- [PR #1624](https://github.com/nf-core/rnaseq/pull/1624) - Document RSeQC inner_distance limitation for genomes with large chromosomes (>500 Mb), such as plant genomes +- [PR #1625](https://github.com/nf-core/rnaseq/pull/1625) - Add documentation warning about Qualimap read counting bug ([#1273](https://github.com/nf-core/rnaseq/issues/1273)) +- [PR #1628](https://github.com/nf-core/rnaseq/pull/1628) - Template update for nf-core/tools v3.5.1 +- [PR #1630](https://github.com/nf-core/rnaseq/pull/1630) - Fix arm64 profile to use pre-built ARM containers and update documentation +- [PR #1631](https://github.com/nf-core/rnaseq/pull/1631) - Fix bbsplit index staging by using symlinks instead of full copy +- [PR #1632](https://github.com/nf-core/rnaseq/pull/1632) - Add validation error for incompatible `--transcript_fasta` and `--additional_fasta` params ([#1450](https://github.com/nf-core/rnaseq/issues/1450)) +- [PR #1635](https://github.com/nf-core/rnaseq/pull/1635) - Fix `--gtf_extra_attributes` to support multiple comma-separated values and correct deprecated parameter name in docs ([#1626](https://github.com/nf-core/rnaseq/issues/1626)) +- [PR #1636](https://github.com/nf-core/rnaseq/pull/1636) - Simplify workflow nextflow.config by consolidating redundant patterns +- [PR #1638](https://github.com/nf-core/rnaseq/pull/1638) - Bump version to 3.22.0 ahead of release + ## [[3.21.0](https://github.com/nf-core/rnaseq/releases/tag/3.21.0)] - 2025-09-18 ### Credits diff --git a/README.md b/README.md index 8b309334f..79619a722 100644 --- a/README.md +++ b/README.md @@ -5,12 +5,13 @@ +[![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/nf-core/rnaseq) [![GitHub Actions CI Status](https://github.com/nf-core/rnaseq/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/rnaseq/actions/workflows/nf-test.yml) [![GitHub Actions Linting Status](https://github.com/nf-core/rnaseq/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/rnaseq/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/rnaseq/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.1400710-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.1400710) [![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com) -[![Nextflow](https://img.shields.io/badge/version-%E2%89%A524.10.5-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/) -[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.2-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.2) +[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.04.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/) +[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.5.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.5.1) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) @@ -112,7 +113,7 @@ You can find numerous talks on the [nf-core events page](https://nf-co.re/events These scripts were originally written for use at the [National Genomics Infrastructure](https://ngisweden.scilifelab.se), part of [SciLifeLab](http://www.scilifelab.se/) in Stockholm, Sweden, by Phil Ewels ([@ewels](https://github.com/ewels)) and Rickard Hammarén ([@Hammarn](https://github.com/Hammarn)). -The pipeline was re-written in Nextflow DSL2 and is primarily maintained by Harshil Patel ([@drpatelh](https://github.com/drpatelh)) from [Seqera Labs, Spain](https://seqera.io/). +The pipeline was re-written in Nextflow DSL2 by Harshil Patel ([@drpatelh](https://github.com/drpatelh)) from [Seqera Labs, Spain](https://seqera.io/), and is currently maintained by Jonathan Manning ([@pinin4fjords](https://github.com/pinin4fjords)) and the nf-core community. The pipeline workflow diagram was initially designed by Sarah Guinchard ([@G-Sarah](https://github.com/G-Sarah)) and James Fellows Yates ([@jfy133](https://github.com/jfy133)), further modifications where made by Harshil Patel ([@drpatelh](https://github.com/drpatelh)) and Maxime Garcia ([@maxulysse](https://github.com/maxulysse)). diff --git a/conf/arm.config b/conf/arm.config index bb5b222c3..9f2ed3f1f 100644 --- a/conf/arm.config +++ b/conf/arm.config @@ -64,6 +64,10 @@ process { container = { workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b0/b00189d4f7eed55199354a3dff8a9a535e1dfb3a2f4c97f5d0bf9e388105795e/data' : 'community.wave.seqera.io/library/fq:0.12.0--ad6857b304869ce9' } } + withName: 'FQ_LINT' { + container = { workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b0/b00189d4f7eed55199354a3dff8a9a535e1dfb3a2f4c97f5d0bf9e388105795e/data' : 'community.wave.seqera.io/library/fq:0.12.0--ad6857b304869ce9' } + } + withName: 'GFFREAD' { container = { workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/9e/9e6db95013607b07689e38ee37a654d029236de77fdfde97fe1866f45d01e064/data' : 'community.wave.seqera.io/library/gffread:0.12.7--1577aa7c95340d9f' } } @@ -197,11 +201,11 @@ process { } withName: 'STAR_ALIGN' { - container = { workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/40/40d803371e50330de0773c7cc50315e2c3b4b41dcf123823adeb0a07d71654c1/data' : 'community.wave.seqera.io/library/htslib_samtools_star_gawk:ae438e9a604351a4' } + container = { workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/10/101ea47973178f85ff66a34de6a7462aaf99d947d3924c27ce8a2d5a63009065/data' : 'community.wave.seqera.io/library/htslib_samtools_star_gawk:de8c848656c2c4c5' } } withName: 'STAR_GENOMEGENERATE' { - container = { workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/40/40d803371e50330de0773c7cc50315e2c3b4b41dcf123823adeb0a07d71654c1/data' : 'community.wave.seqera.io/library/htslib_samtools_star_gawk:ae438e9a604351a4' } + container = { workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/10/101ea47973178f85ff66a34de6a7462aaf99d947d3924c27ce8a2d5a63009065/data' : 'community.wave.seqera.io/library/htslib_samtools_star_gawk:de8c848656c2c4c5' } } withName: 'STRINGTIE_STRINGTIE' { @@ -232,6 +236,10 @@ process { container = { workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/38/389312a4a6022c5f5d2510dfa9bedb0491b36c8a27e8d842c05de00bc3b5be76/data' : 'community.wave.seqera.io/library/ucsc-bedgraphtobigwig:469--1db18e1b19f8e5f1' } } + withName: 'UMICOLLAPSE' { + container = { workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/4c/4c8b7641b14c16d52ea08c1ced6a8be4a686c9502de648228dc406d9642a9572/data' : 'community.wave.seqera.io/library/umicollapse:1.1.0--d406900dbf84ec60' } + } + withName: 'UMITOOLS_DEDUP' { container = { workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/ba/ba7c02244236db73a56ff1eb880b16c2dd53679834e1b72bb096e1f633e35644/data' : 'community.wave.seqera.io/library/umi_tools:1.1.5--b2721816f7a92564' } } diff --git a/docs/output.md b/docs/output.md index b47593a05..77b921fb4 100644 --- a/docs/output.md +++ b/docs/output.md @@ -614,6 +614,18 @@ RSeQC documentation: [tin.py](http://rseqc.sourceforge.net/#tin-py) The [Qualimap RNA-seq QC module](http://qualimap.bioinfo.cipf.es/doc_html/analysis.html#rna-seq-qc) is used within this pipeline to assess the overall mapping and coverage relative to gene features. +:::warning +**Known limitation**: Qualimap has a known bug ([#1273](https://github.com/nf-core/rnaseq/issues/1273), [Qualimap issue #81](https://bitbucket.org/kokonech/qualimap/issues/81)) where it may report more reads assigned to genomic features (exons, introns, intergenic regions) than the total number of reads in the BAM file. This can lead to inflated read counts and incorrect genomic origin statistics. + +If accurate read distribution metrics are critical for your analysis, we recommend: + +- Cross-referencing Qualimap results with RSeQC output (particularly `read_distribution.txt`) +- Using `--skip_qualimap` to disable Qualimap and rely on RSeQC for genomic feature distribution +- Validating suspicious results with alternative tools or custom scripts + +This is an upstream tool issue that cannot be fixed at the pipeline level. +::: + ![MultiQC - Qualimap gene coverage plot](images/mqc_qualimap_coverage.png) ![MultiQC - Qualimap genomic origin plot](images/mqc_qualimap_features.png) diff --git a/docs/usage.md b/docs/usage.md index 00cac88b2..39706c3ac 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -181,10 +181,6 @@ If you would like to reduce the number of reads used in the analysis, for exampl ## Alignment options -:::note -The `--aligner hisat2` option is not currently supported using ARM architecture ('-profile arm') -::: - By default, the pipeline uses [STAR](https://github.com/alexdobin/STAR) (i.e. `--aligner star_salmon`) to map the raw FastQ reads to the reference genome, project the alignments onto the transcriptome and to perform the downstream BAM-level quantification with [Salmon](https://salmon.readthedocs.io/en/latest/salmon.html). STAR is fast but requires a lot of memory to run, typically around 38GB for the Human GRCh37 reference genome. Both `--aligner star_salmon` and `--aligner star_rsem` use STAR for alignment, so you should use the [HISAT2](https://ccb.jhu.edu/software/hisat2/index.shtml) aligner (i.e. `--aligner hisat2`) if you have memory limitations. You also have the option to pseudoalign and quantify your data directly with [Salmon](https://salmon.readthedocs.io/en/latest/salmon.html) or [Kallisto](https://pachterlab.github.io/kallisto/) by specifying `salmon` or `kallisto` to the `--pseudo_aligner` parameter. The selected pseudoaligner will then be run in addition to the standard alignment workflow defined by `--aligner`, mainly because it allows you to obtain QC metrics with respect to the genomic alignments. However, you can provide the `--skip_alignment` parameter if you would like to run Salmon or Kallisto in isolation. By default, the pipeline will use the genome fasta and gtf file to generate the transcripts fasta file, and then to build the Salmon index. You can override these parameters using the `--transcript_fasta` and `--salmon_index` parameters, respectively. @@ -247,13 +243,13 @@ The `--umitools_grouping_method` parameter affects [how similar, but non-identic #### Examples: -| UMI type | Source | Pipeline parameters | -| ------------ | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| In read name | [Illumina BCL convert >3.7.5](https://emea.support.illumina.com/content/dam/illumina-support/documents/documentation/software_documentation/bcl_convert/bcl-convert-v3-7-5-software-guide-1000000163594-00.pdf) | `--with_umi --skip_umi_extract --umitools_umi_separator ":"` | -| In sequence | [Lexogen QuantSeq® 3’ mRNA-Seq V2 FWD](https://www.lexogen.com/quantseq-3mrna-sequencing) + [UMI Second Strand Synthesis Module](https://faqs.lexogen.com/faq/how-can-i-add-umis-to-my-quantseq-libraries) | `--with_umi --umitools_extract_method "regex" --umitools_bc_pattern "^(?P.{6})(?P.{4}).*"` | -| In sequence | [Lexogen CORALL® Total RNA-Seq V1](https://www.lexogen.com/corall-total-rna-seq/)
> _mind [Appendix H](https://www.lexogen.com/wp-content/uploads/2020/04/095UG190V0130_CORALL-Total-RNA-Seq_2020-03-31.pdf) regarding optional trimming_ | `--with_umi --umitools_extract_method "regex" --umitools_bc_pattern "^(?P.{12}).*"`
Optional: `--clip_r2 9 --three_prime_clip_r2 12` | -| In sequence | [Takara Bio SMARTer® Stranded Total RNA-Seq Kit v3](https://www.takarabio.com/documents/User%20Manual/SMARTer%20Stranded%20Total%20RNA/SMARTer%20Stranded%20Total%20RNA-Seq%20Kit%20v3%20-%20Pico%20Input%20Mammalian%20User%20Manual-a_114949.pdf) | `--with_umi --umitools_extract_method "regex" --umitools_bc_pattern2 "^(?P.{8})(?P.{6}).*"` | -| In sequence | [Watchmaker mRNA Library Prep Kit](https://watchmakergenomics.com/wp-content/uploads/2023/11/M223_mRNA-Library-Prep-Kit-_UG_WMUG214_v1-1-0823.pdf) with [Twist UMI Adapter System](https://www.twistbioscience.com/sites/default/files/resources/2023-03/DOC-001337_TechNote-ProcessingSequencingDataUtilizingUMI-REV1-singles.pdf) | `--with_umi --umitools_extract_method "regex" --umitools_bc_pattern "^(?P.{5})(?P.{2}).*" --umitools_bc_pattern2 "^(?P.{5})(?P.{2}).*"` | +| UMI type | Source | Pipeline parameters | +| ------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| In read name | [Illumina BCL convert >3.7.5](https://emea.support.illumina.com/content/dam/illumina-support/documents/documentation/software_documentation/bcl_convert/bcl-convert-v3-7-5-software-guide-1000000163594-00.pdf) | `--with_umi --skip_umi_extract --umitools_umi_separator ":"` | +| In sequence | [Lexogen QuantSeq® 3’ mRNA-Seq V2 FWD](https://www.lexogen.com/quantseq-3mrna-sequencing) + [UMI Second Strand Synthesis Module](https://faqs.lexogen.com/faq/how-can-i-add-umis-to-my-quantseq-libraries) | `--with_umi --umitools_extract_method "regex" --umitools_bc_pattern "^(?P.{6})(?P.{4}).*"` | +| In sequence | [Lexogen CORALL® Total RNA-Seq V1](https://www.lexogen.com/corall-total-rna-seq/)
> _mind [Appendix H](https://www.lexogen.com/wp-content/uploads/2020/04/095UG190V0130_CORALL-Total-RNA-Seq_2020-03-31.pdf) regarding optional trimming_ | `--with_umi --umitools_extract_method "regex" --umitools_bc_pattern "^(?P.{12}).*"`
Optional: `--clip_r2 9 --three_prime_clip_r2 12` | +| In sequence | Takara Bio [SMART-Seq Total RNA Pico Input with UMIs](https://www.takarabio.com/documents/User%20Manual/SMART/SMART-Seq%20Total%20RNA%20Pico%20Input%20with%20UMIs%20%28ZapR%20Mammalian%29%20User%20Manual.pdf) and [SMARTer® Stranded Total RNA-Seq Kit v3](https://www.takarabio.com/documents/User%20Manual/SMARTer%20Stranded%20Total%20RNA/SMARTer%20Stranded%20Total%20RNA-Seq%20Kit%20v3%20-%20Pico%20Input%20Mammalian%20User%20Manual-a_114950.pdf) | `--with_umi --umitools_extract_method "regex" --umitools_bc_pattern2 "^(?P.{8})(?P.{6}).*"` | +| In sequence | [Watchmaker mRNA Library Prep Kit](https://watchmakergenomics.com/wp-content/uploads/2023/11/M223_mRNA-Library-Prep-Kit-_UG_WMUG214_v1-1-0823.pdf) with [Twist UMI Adapter System](https://www.twistbioscience.com/sites/default/files/resources/2023-03/DOC-001337_TechNote-ProcessingSequencingDataUtilizingUMI-REV1-singles.pdf) | `--with_umi --umitools_extract_method "regex" --umitools_bc_pattern "^(?P.{5})(?P.{2}).*" --umitools_bc_pattern2 "^(?P.{5})(?P.{2}).*"` | > _No warranty for the accuracy or completeness of the parameters is implied_ @@ -316,7 +312,7 @@ Notes: - If `--gff` is provided as input then this will be converted to a GTF file, or the latter will be used if both are provided. - If `--gene_bed` is not provided then it will be generated from the GTF file. -- If `--additional_fasta` is provided then the features in this file (e.g. ERCC spike-ins) will be automatically concatenated onto both the reference FASTA file as well as the GTF annotation before building the appropriate indices. +- If `--additional_fasta` is provided then the features in this file (e.g. ERCC spike-ins) will be automatically concatenated onto both the reference FASTA file as well as the GTF annotation before building the appropriate indices. Note: if you need the pipeline to build a pseudo-aligner index (Salmon/Kallisto), `--additional_fasta` cannot be used together with `--transcript_fasta` because the pipeline cannot append additional sequences to a user-provided transcriptome. Either omit `--transcript_fasta` and let the pipeline generate it, or provide a pre-built index that already contains the spike-ins. - When using `--aligner star_rsem`, the pipeline will build separate STAR and RSEM indices. STAR performs alignment with RSEM-compatible parameters, then RSEM quantifies from the resulting BAM files using `--alignments` mode. - If the `--skip_alignment` option is used along with `--transcript_fasta`, the pipeline can technically run without providing the genomic FASTA (`--fasta`). However, this approach is **not recommended** with `--pseudo_aligner salmon`, as any dynamically generated Salmon index will lack decoys. To ensure optimal indexing with decoys, it is **highly recommended** to include the genomic FASTA (`--fasta`) with Salmon, unless a pre-existing decoy-aware Salmon index is supplied. For more details on the benefits of decoy-aware indexing, refer to the [Salmon documentation](https://salmon.readthedocs.io/en/latest/salmon.html#preparing-transcriptome-indices-mapping-based-mode). @@ -350,6 +346,10 @@ In addition to the reference genome sequence and annotation, you can provide a r We recommend not providing a transcriptome FASTA file and instead allowing the pipeline to create it from the provided genome and annotation. Similar to aligner indexes, you can save the created transcriptome FASTA and BED files to a central location for future pipeline runs. This helps avoid redundant computation and having multiple copies on your system. Ensure that all genome, annotation, transcriptome, and index versions match to maintain consistency. +:::warning +If you are using `--additional_fasta` to add spike-in sequences (e.g. ERCC) and need the pipeline to build a pseudo-aligner index (Salmon/Kallisto), you **must not** provide `--transcript_fasta`. The pipeline needs to generate the transcriptome itself so that it includes the spike-in sequences. This combination will cause the pipeline to exit with an error unless you also provide a pre-built index (`--salmon_index` or `--kallisto_index`) that already contains the spike-in sequences. +::: + #### Indices By default, indices are generated dynamically by the workflow for tools such as STAR and Salmon. Since indexing is an expensive process in time and resources you should ensure that it is only done once, by retaining the indices generated from each batch of reference files by specifying `--save_reference`. @@ -362,7 +362,7 @@ Remember to note the genome and annotation versions as well as the versions of t If you are using [GENCODE](https://www.gencodegenes.org/) reference genome files please specify the `--gencode` parameter because the format of these files is slightly different to ENSEMBL genome files: -- The `--gtf_group_features_type` parameter will automatically be set to `gene_type` as opposed to `gene_biotype`, respectively. +- The `--featurecounts_group_type` parameter will automatically be set to `gene_type` as opposed to `gene_biotype`, respectively. - If you are running Salmon, the `--gencode` flag will also be passed to the index building step to overcome parsing issues resulting from the transcript IDs in GENCODE fasta files being separated by vertical pipes (`|`) instead of spaces (see [this issue](https://github.com/COMBINE-lab/salmon/issues/15)). As well as the standard annotations, GENCODE also provides "basic" annotations, which include only representative transcripts, but we do not recommend using these. @@ -378,6 +378,23 @@ This pipeline uses featureCounts to generate QC metrics based on [biotype](http: Please get in touch with us on the #rnaseq channel in the [nf-core Slack workspace](https://nf-co.re/join) if you are having problems or need any advice. +#### Large chromosomes (plant genomes) + +Genomes with very large chromosomes (>500 Mb), such as plant genomes, may encounter failures in the RSeQC `inner_distance` module due to a known limitation in the underlying bx-python library. The bx-python BitSet implementation has a maximum capacity of approximately 537 million bases, which can be exceeded by chromosomes in organisms like wheat, barley, and other plants. + +If you encounter an error message similar to `IndexError: [coordinate] is larger than the size of this BitSet (536870912)`, you can work around this by excluding the `inner_distance` module from RSeQC analysis: + +```bash +--rseqc_modules 'bam_stat,infer_experiment,junction_annotation,junction_saturation,read_distribution,read_duplication' +``` + +This removes `inner_distance` from the default list of RSeQC modules while retaining all other quality control metrics. Note that the inner_distance metric is only relevant for paired-end data and provides information about fragment size distribution. + +For more information, see the upstream issues: + +- [nf-core/rnaseq#608](https://github.com/nf-core/rnaseq/issues/608) +- [bxlab/bx-python#67](https://github.com/bxlab/bx-python/issues/67) + ### iGenomes (not recommended) If the `--genome` parameter is provided (e.g. `--genome GRCh37`) then the FASTA and GTF files (and existing indices) will be automatically obtained from AWS-iGenomes unless these have already been downloaded locally in the path specified by `--igenomes_base`. @@ -397,10 +414,6 @@ By default, the input GTF file will be filtered to ensure that sequence names co ## Contamination screening options -:::note -The `--contaminant_screening` option is not currently available using ARM architecture ('-profile arm') -::: - The pipeline provides the option to scan unaligned reads for contamination from other species using [Kraken2](https://ccb.jhu.edu/software/kraken2/), with the possibility of applying corrections from [Bracken](https://ccb.jhu.edu/software/bracken/). Since running Bracken is not computationally expensive, we recommend always using it to refine the abundance estimates generated by Kraken2. It is important to note that the accuracy of Kraken2 is [highly dependent on the database](https://doi.org/10.1099/mgen.0.000949) used. Specifically, it is [crucial](https://doi.org/10.1128/mbio.01607-23) to ensure that the host genome is included in the database. If you are particularly concerned about certain contaminants, it may be beneficial to use a smaller, more focused database containing primarily those contaminants instead of the full standard database. Various pre-built databases [are available for download](https://benlangmead.github.io/aws-indexes/k2), and instructions for building a custom database can be found in the [Kraken2 documentation](https://github.com/DerrickWood/kraken2/blob/master/docs/MANUAL.markdown). Additionally, genomes of contaminants detected in previous sequencing experiments are available on the [OpenContami website](https://openlooper.hgc.jp/opencontami/help/help_oct.php). @@ -479,7 +492,7 @@ You can also generate such `YAML`/`JSON` files via [nf-core/launch](https://nf-c ### Running on Linux ARM architectures :::warning -Please note that the ARM profile is experimental. It is expected to function correctly in all cases unless explicitly indicated otherwise—currently, exceptions include the use of the hisat2 aligner and contaminant screening via kraken2. However, because testing is presently conducted manually, we cannot guarantee its reliability. +Please note that the ARM profile is experimental. However, because testing is presently conducted manually, we cannot guarantee its reliability. ::: The pipeline can be executed in an ARM compatible mode by specifying the ARM profile, for example: @@ -491,7 +504,7 @@ nextflow run \ --outdir \ --gtf \ --fasta \ - -profile docker,arm + -profile docker,arm64 ``` This will use ARM-compatible containers, and apply a small number of overrides to Conda definitions to support ARM operations. @@ -550,15 +563,15 @@ If `-profile` is not specified, the pipeline will run locally and expect all sof - `shifter` - A generic configuration profile to be used with [Shifter](https://nersc.gitlab.io/development/shifter/how-to-use/) - `charliecloud` - - A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) + - A generic configuration profile to be used with [Charliecloud](https://charliecloud.io/) - `apptainer` - A generic configuration profile to be used with [Apptainer](https://apptainer.org/) - `wave` - A generic configuration profile to enable [Wave](https://seqera.io/wave/) containers. Use together with one of the above (requires Nextflow ` 24.03.0-edge` or later). - `conda` - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter, Charliecloud, or Apptainer. -- `arm` - - A configuration profile that will set `docker.runOptions` appropriately for ARM architectures, and apply overrides supplying ARM-compatible containers and Conda environments. See [Running on Linux ARM architectures](#running-on-linux-arm-architectures). +- `arm64` + - A configuration profile that applies overrides supplying ARM-compatible containers and Conda environments. See [Running on Linux ARM architectures](#running-on-linux-arm-architectures). ### `-resume` diff --git a/main.nf b/main.nf index 3fea7e29b..52ae6be08 100755 --- a/main.nf +++ b/main.nf @@ -147,7 +147,11 @@ workflow { params.validate_params, params.monochrome_logs, args, - params.outdir + params.outdir, + params.input, + params.help, + params.help_full, + params.show_hidden ) // diff --git a/modules.json b/modules.json index aa20e9320..87d4bd1fa 100644 --- a/modules.json +++ b/modules.json @@ -7,7 +7,7 @@ "nf-core": { "bbmap/bbsplit": { "branch": "master", - "git_sha": "8864afe586537bf562eac7b83349c26207f3cb4d", + "git_sha": "6da7216c83d9d885bdeb7aef1bcb9b51a90f370b", "installed_by": ["fastq_qc_trim_filter_setstrandedness", "modules"] }, "bedtools/genomecov": { @@ -37,7 +37,7 @@ }, "custom/tx2gene": { "branch": "master", - "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "git_sha": "e0bdf8637721e27792a38c6b622f0a2345f3cbc9", "installed_by": ["modules", "quantify_pseudo_alignment"] }, "dupradar": { @@ -269,7 +269,7 @@ }, "tximeta/tximport": { "branch": "master", - "git_sha": "1f008221e451e7a4738226c49e69aaa2eb731369", + "git_sha": "d205ebc03abc530a984d844ab57373f566967ac8", "installed_by": ["modules", "quantify_pseudo_alignment"] }, "ucsc/bedclip": { @@ -391,17 +391,17 @@ }, "utils_nextflow_pipeline": { "branch": "master", - "git_sha": "c2b22d85f30a706a3073387f30380704fcae013b", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", "installed_by": ["subworkflows"] }, "utils_nfcore_pipeline": { "branch": "master", - "git_sha": "51ae5406a030d4da1e49e4dab49756844fdd6c7a", + "git_sha": "271e7fc14eb1320364416d996fb077421f3faed2", "installed_by": ["subworkflows"] }, "utils_nfschema_plugin": { "branch": "master", - "git_sha": "2fd2cd6d0e7b273747f32e465fdc6bcc3ae0814e", + "git_sha": "4b406a74dc0449c0401ed87d5bfff4252fd277fd", "installed_by": ["subworkflows"] } } diff --git a/modules/nf-core/bbmap/bbsplit/main.nf b/modules/nf-core/bbmap/bbsplit/main.nf index da4e0a1b2..c755bb718 100644 --- a/modules/nf-core/bbmap/bbsplit/main.nf +++ b/modules/nf-core/bbmap/bbsplit/main.nf @@ -10,13 +10,13 @@ process BBMAP_BBSPLIT { input: tuple val(meta), path(reads) - path index + path index, name: 'input_index' path primary_ref tuple val(other_ref_names), path(other_ref_paths) val only_build_index output: - path "bbsplit" , optional:true, emit: index + path "bbsplit_index" , optional:true, emit: index tuple val(meta), path('*primary*fastq.gz'), optional:true, emit: primary_fastq tuple val(meta), path('*fastq.gz') , optional:true, emit: all_fastq tuple val(meta), path('*txt') , optional:true, emit: stats @@ -46,16 +46,17 @@ process BBMAP_BBSPLIT { def fastq_out='' def index_files='' def refstats_cmd='' + def use_index = index ? true : false if (only_build_index) { if (primary_ref && other_ref_names && other_ref_paths) { - index_files = 'ref_primary=' +primary_ref + ' ' + other_refs.join(' ') + ' path=bbsplit' + index_files = 'ref_primary=' +primary_ref + ' ' + other_refs.join(' ') + ' path=bbsplit_build' } else { log.error 'ERROR: Please specify as input a primary fasta file along with names and paths to non-primary fasta files.' } } else { if (index) { - index_files = "path=$index" + index_files = "path=index_writable" } else if (primary_ref && other_ref_names && other_ref_paths) { index_files = "ref_primary=${primary_ref} ${other_refs.join(' ')}" } else { @@ -67,15 +68,19 @@ process BBMAP_BBSPLIT { } """ - # When we stage in the index files the time stamps get disturbed, which - # bbsplit doesn't like. Fix the time stamps in its summaries. This needs to - # be done via Java to match what bbmap does - - if [ $index ]; then - for summary_file in \$(find $index/ref/genome -name summary.txt); do - src=\$(grep '^source' "\$summary_file" | cut -f2- -d\$'\\t' | sed 's|.*/bbsplit|bbsplit|') + # If using a pre-built index, create writable structure: symlink all files except + # summary.txt (which we copy to modify). When we stage in the index files the time + # stamps get disturbed, which bbsplit doesn't like. Fix the time stamps in summaries. + if [ "$use_index" == "true" ]; then + find input_index/ref -type f | while read -r f; do + target="index_writable/\${f#input_index/}" + mkdir -p "\$(dirname "\$target")" + [[ \$(basename "\$f") == "summary.txt" ]] && cp "\$f" "\$target" || ln -s "\$(realpath "\$f")" "\$target" + done + find index_writable/ref/genome -name summary.txt | while read -r summary_file; do + src=\$(grep '^source' "\$summary_file" | cut -f2- -d\$'\\t' | sed 's|.*/ref/|index_writable/ref/|') mod=\$(echo "System.out.println(java.nio.file.Files.getLastModifiedTime(java.nio.file.Paths.get(\\"\$src\\")).toMillis());" | jshell -J-Djdk.lang.Process.launchMechanism=vfork -) - sed "s|^last modified.*|last modified\\t\$mod|" "\$summary_file" > \${summary_file}.tmp && mv \${summary_file}.tmp \${summary_file} + sed -e 's|bbsplit_index/ref|index_writable/ref|' -e "s|^last modified.*|last modified\\t\$mod|" "\$summary_file" > \${summary_file}.tmp && mv \${summary_file}.tmp \${summary_file} done fi @@ -91,12 +96,13 @@ process BBMAP_BBSPLIT { $args 2>| >(tee ${prefix}.log >&2) # Summary files will have an absolute path that will make the index - # impossible to use in other processes- we can fix that - - for summary_file in \$(find bbsplit/ref/genome -name summary.txt); do - src=\$(grep '^source' "\$summary_file" | cut -f2- -d\$'\\t' | sed 's|.*/bbsplit|bbsplit|') - sed "s|^source.*|source\\t\$src|" "\$summary_file" > \${summary_file}.tmp && mv \${summary_file}.tmp \${summary_file} - done + # impossible to use in other processes - fix paths and rename atomically + if [ -d bbsplit_build/ref/genome ]; then + find bbsplit_build/ref/genome -name summary.txt | while read -r summary_file; do + sed "s|^source.*|source\\t\$(grep '^source' "\$summary_file" | cut -f2- -d\$'\\t' | sed 's|.*/bbsplit_build|bbsplit_index|')|" "\$summary_file" > \${summary_file}.tmp && mv \${summary_file}.tmp \${summary_file} + done + mv bbsplit_build bbsplit_index + fi cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -110,11 +116,14 @@ process BBMAP_BBSPLIT { other_ref_names.eachWithIndex { name, index -> other_refs += "echo '' | gzip > ${prefix}_${name}.fastq.gz" } + def will_build_index = only_build_index || (!index && primary_ref && other_ref_names && other_ref_paths) """ - if [ ! -d bbsplit ]; then - mkdir bbsplit + # Create index directory if building an index (either only_build_index or on-the-fly) + if [ "${will_build_index}" == "true" ]; then + mkdir -p bbsplit_index fi + # Only create output files if splitting (not just building index) if ! (${only_build_index}); then echo '' | gzip > ${prefix}_primary.fastq.gz ${other_refs} diff --git a/modules/nf-core/bbmap/bbsplit/meta.yml b/modules/nf-core/bbmap/bbsplit/meta.yml index 081aeb2b3..66f87cce0 100644 --- a/modules/nf-core/bbmap/bbsplit/meta.yml +++ b/modules/nf-core/bbmap/bbsplit/meta.yml @@ -25,31 +25,33 @@ input: description: | List of input FastQ files of size 1 and 2 for single-end and paired-end data, respectively. - - - index: - type: directory - description: Directory to place generated index - pattern: "*" - - - primary_ref: - type: file - description: Path to the primary reference - pattern: "*" + ontologies: [] + - index: + type: directory + description: Directory to place generated index + pattern: "*" + - primary_ref: + type: file + description: Path to the primary reference + pattern: "*" + ontologies: [] - - other_ref_names: type: list description: List of other reference ids apart from the primary - other_ref_paths: type: list description: Path to other references paths corresponding to "other_ref_names" - - - only_build_index: - type: string - description: true = only build index; false = mapping + - only_build_index: + type: string + description: true = only build index; false = mapping output: - - index: - - bbsplit: - type: directory - description: Directory with index files - pattern: "bbsplit" - - primary_fastq: - - meta: + index: + - bbsplit_index: + type: directory + description: Directory with index files + pattern: "bbsplit_index" + primary_fastq: + - - meta: type: map description: | Groovy Map containing sample information @@ -58,8 +60,10 @@ output: type: file description: Output reads that map to the primary reference pattern: "*primary*fastq.gz" - - all_fastq: - - meta: + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + all_fastq: + - - meta: type: map description: | Groovy Map containing sample information @@ -68,8 +72,10 @@ output: type: file description: All reads mapping to any of the references pattern: "*fastq.gz" - - stats: - - meta: + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + stats: + - - meta: type: map description: | Groovy Map containing sample information @@ -78,8 +84,9 @@ output: type: file description: Tab-delimited text file containing mapping statistics pattern: "*.txt" - - log: - - meta: + ontologies: [] + log: + - - meta: type: map description: | Groovy Map containing sample information @@ -88,14 +95,19 @@ output: type: file description: Log file pattern: "*.log" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML authors: - "@joseespinosa" - "@drpatelh" + - "@pinin4fjords" maintainers: - "@joseespinosa" - "@drpatelh" + - "@pinin4fjords" diff --git a/modules/nf-core/bbmap/bbsplit/tests/main.nf.test b/modules/nf-core/bbmap/bbsplit/tests/main.nf.test index 0674d247f..2f942f39c 100644 --- a/modules/nf-core/bbmap/bbsplit/tests/main.nf.test +++ b/modules/nf-core/bbmap/bbsplit/tests/main.nf.test @@ -3,8 +3,12 @@ nextflow_process { name "Test Process BBMAP_BBSPLIT" script "../main.nf" process "BBMAP_BBSPLIT" + tag "modules" + tag "modules_nfcore" + tag "bbmap" + tag "bbmap/bbsplit" - test("sarscov2_se_fastq_fasta_chr22_fasta - index") { + test("sarscov2_se_fastq_fasta_chr22_fasta - build index") { when { process { @@ -31,7 +35,7 @@ nextflow_process { } } - test("sarscov2_se_fastq_fasta_chr22_fasta - index - stub") { + test("sarscov2_se_fastq_fasta_chr22_fasta - build index - stub") { options "-stub" @@ -52,13 +56,12 @@ nextflow_process { then { assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } + { assert process.success } ) } } - test("sarscov2_se_fastq_fasta_chr22_fasta") { + test("sarscov2_se_fastq_fasta_chr22_fasta - split with prebuilt index") { setup { @@ -87,58 +90,27 @@ nextflow_process { file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ]) input[1] = BBMAP_BBSPLIT_INDEX.out.index - input[2] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)) - input[3] = Channel.of([ - [ 'human' ], // meta map - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr22/sequence/chr22_23800000-23980000.fa', checkIfExists: true) - ]) + input[2] = [] + input[3] = [[], []] input[4] = false """ } } then { - def filesToExclude = [ - "info.txt", - "reflist.txt", - "scaffolds.txt.gz", - "summary.txt" - ] - - def outputFiles = [] - def outputDirectory = new File(process.out.index[0]) - outputDirectory.eachFileRecurse { file -> - if (file.isFile()){ - outputFiles << file - } - } - - def filesExist = filesToExclude.every { excludeName -> - outputFiles.any { file -> - file.getName().endsWith(excludeName) && file.exists() - } - } - - def filteredFiles = outputFiles - .findAll { file -> - !filesToExclude.any { excludeName -> - file.getName().endsWith(excludeName) - } - }.sort{ it.getName()} - assertAll( { assert process.success }, { assert path(process.out.log[0][1]).text.contains("If you wish to regenerate the index") }, - { assert filesExist : "One or more files to exclude do not exist" }, { assert snapshot( - filteredFiles, + process.out.primary_fastq, + process.out.stats, process.out.versions ).match()} ) } } - test("sarscov2_se_fastq_fasta_chr22_fasta - stub") { + test("sarscov2_se_fastq_fasta_chr22_fasta - split with prebuilt index - stub") { options "-stub" @@ -169,11 +141,8 @@ nextflow_process { file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ]) input[1] = BBMAP_BBSPLIT_INDEX.out.index - input[2] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)) - input[3] = Channel.of([ - [ 'human' ], // meta map - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr22/sequence/chr22_23800000-23980000.fa', checkIfExists: true) - ]) + input[2] = [] + input[3] = [[], []] input[4] = false """ } @@ -181,8 +150,7 @@ nextflow_process { then { assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } + { assert process.success } ) } } diff --git a/modules/nf-core/bbmap/bbsplit/tests/main.nf.test.snap b/modules/nf-core/bbmap/bbsplit/tests/main.nf.test.snap index 656d3ed05..7e739a663 100644 --- a/modules/nf-core/bbmap/bbsplit/tests/main.nf.test.snap +++ b/modules/nf-core/bbmap/bbsplit/tests/main.nf.test.snap @@ -1,12 +1,23 @@ { - "sarscov2_se_fastq_fasta_chr22_fasta": { + "sarscov2_se_fastq_fasta_chr22_fasta - split with prebuilt index": { "content": [ [ - "chr1.chrom.gz:md5,8fec4c63ec642613ad10adf4cc2e6ade", - "chr1_index_k13_c13_b1.block:md5,385913c1e84b77dc7bf36288ee1c8706", - "chr1_index_k13_c13_b1.block2.gz:md5,2556b45206835a0ff7078d683b5fd6e2", - "merged_ref_9222711925172838098.fa.gz:md5,983cef447fb28394b88a5b49b3579f0c", - "namelist.txt:md5,45e7a4cdc7a11a39ada56844ca3a1e30" + [ + { + "id": "test", + "single_end": true + }, + "test_primary.fastq.gz:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.stats.txt:md5,2cbf69b72e5f4f8508306b54e8fe2861" + ] ], [ "versions.yml:md5,b75d21d7649b5a512e7b54dd15cd3eba" @@ -14,11 +25,11 @@ ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.5" + "nextflow": "25.10.0" }, - "timestamp": "2025-03-12T18:57:32.958846" + "timestamp": "2025-11-07T12:36:23.511852" }, - "sarscov2_se_fastq_fasta_chr22_fasta - index": { + "sarscov2_se_fastq_fasta_chr22_fasta - build index": { "content": [ [ "versions.yml:md5,b75d21d7649b5a512e7b54dd15cd3eba" @@ -26,174 +37,8 @@ ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.5" - }, - "timestamp": "2025-03-12T18:57:12.155636" - }, - "sarscov2_se_fastq_fasta_chr22_fasta - index - stub": { - "content": [ - { - "0": [ - [ - - ] - ], - "1": [ - - ], - "2": [ - - ], - "3": [ - - ], - "4": [ - [ - { - - }, - "null.log:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "5": [ - "versions.yml:md5,b75d21d7649b5a512e7b54dd15cd3eba" - ], - "all_fastq": [ - - ], - "index": [ - [ - - ] - ], - "log": [ - [ - { - - }, - "null.log:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "primary_fastq": [ - - ], - "stats": [ - - ], - "versions": [ - "versions.yml:md5,b75d21d7649b5a512e7b54dd15cd3eba" - ] - } - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.5" - }, - "timestamp": "2025-03-12T18:57:17.177351" - }, - "sarscov2_se_fastq_fasta_chr22_fasta - stub": { - "content": [ - { - "0": [ - [ - - ] - ], - "1": [ - [ - { - "id": "test", - "single_end": true - }, - "test_primary.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" - ] - ], - "2": [ - [ - { - "id": "test", - "single_end": true - }, - [ - "test_human.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", - "test_primary.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" - ] - ] - ], - "3": [ - [ - { - "id": "test", - "single_end": true - }, - "test.stats.txt:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "4": [ - [ - { - "id": "test", - "single_end": true - }, - "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "5": [ - "versions.yml:md5,b75d21d7649b5a512e7b54dd15cd3eba" - ], - "all_fastq": [ - [ - { - "id": "test", - "single_end": true - }, - [ - "test_human.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", - "test_primary.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" - ] - ] - ], - "index": [ - [ - - ] - ], - "log": [ - [ - { - "id": "test", - "single_end": true - }, - "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "primary_fastq": [ - [ - { - "id": "test", - "single_end": true - }, - "test_primary.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" - ] - ], - "stats": [ - [ - { - "id": "test", - "single_end": true - }, - "test.stats.txt:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions": [ - "versions.yml:md5,b75d21d7649b5a512e7b54dd15cd3eba" - ] - } - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.5" + "nextflow": "25.10.0" }, - "timestamp": "2025-03-12T18:57:42.233549" + "timestamp": "2025-11-07T11:05:45.517832" } } \ No newline at end of file diff --git a/modules/nf-core/custom/tx2gene/meta.yml b/modules/nf-core/custom/tx2gene/meta.yml index 8254afa08..07b1f0f4b 100644 --- a/modules/nf-core/custom/tx2gene/meta.yml +++ b/modules/nf-core/custom/tx2gene/meta.yml @@ -26,26 +26,27 @@ input: type: file description: An annotation file of the reference genome in GTF format pattern: "*.gtf" + ontologies: [] - - meta2: type: map description: | Groovy Map containing information related to the experiment as a whole e.g. `[ id:'SRP123456' ]` - - '"quants/*"': + - quants/*: type: file description: quants file - - - quant_type: - type: string - description: Quantification type, 'kallisto' or 'salmon' - - - id: - type: string - description: Gene ID attribute in the GTF file (default= gene_id) - - - extra: - type: string - description: Extra gene attribute in the GTF file (default= gene_name) + - quant_type: + type: string + description: Quantification type, 'kallisto' or 'salmon' + - id: + type: string + description: Gene ID attribute in the GTF file (default= gene_id) + - extra: + type: string + description: Extra gene attribute(s) in the GTF file, comma-separated for multiple (default= gene_name) output: - - tx2gene: - - meta: + tx2gene: + - - meta: type: map description: | Groovy Map containing reference information related to the GTF file @@ -54,11 +55,15 @@ output: type: file description: A transcript/ gene mapping table in TSV format pattern: "*.tx2gene.tsv" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3475 # TSV + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML authors: - "@pinin4fjords" maintainers: diff --git a/modules/nf-core/custom/tx2gene/templates/tx2gene.py b/modules/nf-core/custom/tx2gene/templates/tx2gene.py index 4d769513d..a90c63e39 100755 --- a/modules/nf-core/custom/tx2gene/templates/tx2gene.py +++ b/modules/nf-core/custom/tx2gene/templates/tx2gene.py @@ -128,7 +128,7 @@ def map_transcripts_to_gene( gtf_file: str, quant_dir: str, gene_id: str, - extra_id_field: str, + extra_id_fields: str, output_file: str, ) -> bool: """ @@ -139,7 +139,7 @@ def map_transcripts_to_gene( gtf_file (str): Path to the GTF file. quant_dir (str): Directory where quantification files are located. gene_id (str): The gene ID attribute in the GTF file. - extra_id_field (str): Additional ID field in the GTF file. + extra_id_fields (str): Additional ID field(s) in the GTF file, comma-separated for multiple. output_file (str): The output file path. Returns: @@ -150,12 +150,17 @@ def map_transcripts_to_gene( # Discover the attribute that corresponds to transcripts in the GTF transcript_attribute = discover_transcript_attribute(gtf_file, transcripts) + # Parse comma-separated extra ID fields + extra_fields = [field.strip() for field in extra_id_fields.split(",")] + # Open GTF and output file to write the mappings # Initialize the set to track seen combinations seen = set() with open(gtf_file) as inh, open(output_file, "w") as output_handle: - output_handle.write(f"{transcript_attribute}\\t{gene_id}\\t{extra_id_field}\\n") + # Write header with all extra fields as separate columns + header_fields = [transcript_attribute, gene_id] + extra_fields + output_handle.write("\\t".join(header_fields) + "\\n") # Parse each line of the GTF, mapping transcripts to genes for line in filter(lambda x: not x.startswith("#"), inh): cols = line.split("\\t") @@ -170,8 +175,10 @@ def map_transcripts_to_gene( # Check if the combination has already been seen if transcript_gene_pair not in seen: # If it's a new combination, write it to the output and add to the seen set - extra_id = attr_dict.get(extra_id_field, attr_dict[gene_id]) - output_handle.write(f"{attr_dict[transcript_attribute]}\\t{attr_dict[gene_id]}\\t{extra_id}\\n") + # Extract values for all extra fields, falling back to gene_id if not present + extra_values = [attr_dict.get(field, attr_dict[gene_id]) for field in extra_fields] + output_fields = [attr_dict[transcript_attribute], attr_dict[gene_id]] + extra_values + output_handle.write("\\t".join(output_fields) + "\\n") seen.add(transcript_gene_pair) return True diff --git a/modules/nf-core/custom/tx2gene/tests/main.nf.test b/modules/nf-core/custom/tx2gene/tests/main.nf.test index 2d45b7646..49518f797 100644 --- a/modules/nf-core/custom/tx2gene/tests/main.nf.test +++ b/modules/nf-core/custom/tx2gene/tests/main.nf.test @@ -4,6 +4,11 @@ nextflow_process { script "../main.nf" process "CUSTOM_TX2GENE" + tag "modules" + tag "modules_nfcore" + tag "custom" + tag "custom/tx2gene" + tag "untar" test("saccharomyces_cerevisiae - gtf") { @@ -44,6 +49,45 @@ nextflow_process { } } + test("saccharomyces_cerevisiae - gtf - multiple extra attributes") { + + setup { + run("UNTAR") { + script "../../../untar/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test'], // meta map + file(params.modules_testdata_base_path + 'genomics/eukaryotes/saccharomyces_cerevisiae/kallisto_results.tar.gz', checkIfExists: true) + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test'], // meta map + file(params.modules_testdata_base_path + 'genomics/eukaryotes/saccharomyces_cerevisiae/genome_gfp.gtf', checkIfExists: true) + ]) + input[1] = UNTAR.out.untar.map { meta, dir -> [ meta, dir.listFiles().collect() ] } + input[2] = 'kallisto' + input[3] = 'gene_id' + input[4] = 'gene_name,gene_biotype' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + test("saccharomyces_cerevisiae - gtf - stub") { options "-stub" @@ -84,4 +128,4 @@ nextflow_process { ) } } -} \ No newline at end of file +} diff --git a/modules/nf-core/custom/tx2gene/tests/main.nf.test.snap b/modules/nf-core/custom/tx2gene/tests/main.nf.test.snap index 2be5fe547..63f319e90 100644 --- a/modules/nf-core/custom/tx2gene/tests/main.nf.test.snap +++ b/modules/nf-core/custom/tx2gene/tests/main.nf.test.snap @@ -32,6 +32,39 @@ }, "timestamp": "2024-10-18T10:24:12.19104487" }, + "saccharomyces_cerevisiae - gtf - multiple extra attributes": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.tx2gene.tsv:md5,97223927dc2e0dae6c38bad96aaa6f49" + ] + ], + "1": [ + "versions.yml:md5,e504b95d76ef4cf65ba0b38cddce2840" + ], + "tx2gene": [ + [ + { + "id": "test" + }, + "test.tx2gene.tsv:md5,97223927dc2e0dae6c38bad96aaa6f49" + ] + ], + "versions": [ + "versions.yml:md5,e504b95d76ef4cf65ba0b38cddce2840" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.0" + }, + "timestamp": "2025-11-25T19:28:57.610922" + }, "saccharomyces_cerevisiae - gtf - stub": { "content": [ { diff --git a/modules/nf-core/qualimap/rnaseq/nextflow.config b/modules/nf-core/qualimap/rnaseq/nextflow.config index 2d3756d58..5c1b59423 100644 --- a/modules/nf-core/qualimap/rnaseq/nextflow.config +++ b/modules/nf-core/qualimap/rnaseq/nextflow.config @@ -1,7 +1,16 @@ if (!params.skip_qc) { if (!params.skip_qualimap) { process { + withName: 'SAMTOOLS_SORT_QUALIMAP' { + ext.args = '-n' + ext.prefix = { "${meta.id}.namesorted" } + publishDir = [ + enabled: false + ] + } + withName: 'QUALIMAP_RNASEQ' { + ext.args = '--sorted' publishDir = [ path: { "${params.outdir}/${params.aligner}/qualimap" }, mode: params.publish_dir_mode, diff --git a/modules/nf-core/tximeta/tximport/meta.yml b/modules/nf-core/tximeta/tximport/meta.yml index d4c6a5492..b8ab9bca8 100644 --- a/modules/nf-core/tximeta/tximport/meta.yml +++ b/modules/nf-core/tximeta/tximport/meta.yml @@ -25,9 +25,7 @@ input: description: | Groovy Map containing information related to the experiment as a whole e.g. `[ id:'SRP123456' ]` - - '"quants/*"': - type: directory - description: Directory containing quantification files + - quants/*: {} - - meta2: type: map description: | @@ -37,12 +35,15 @@ input: type: file description: A transcript to gene mapping table such as those generated by custom/tx2gene pattern: "*.{csv,tsv}" - - - quant_type: - type: string - description: Quantification type, 'kallisto' or 'salmon' + ontologies: + - edam: http://edamontology.org/format_3752 # CSV + - edam: http://edamontology.org/format_3475 # TSV + - quant_type: + type: string + description: Quantification type, 'kallisto' or 'salmon' output: - - tpm_gene: - - meta: + tpm_gene: + - - meta: type: map description: | Groovy Map containing information related to the experiment as a whole @@ -53,8 +54,10 @@ output: Abundance (TPM) values derived from tximport output after summarizeToGene(), without a 'countsFromAbundance' specification pattern: "*gene_tpm.tsv" - - counts_gene: - - meta: + ontologies: + - edam: http://edamontology.org/format_3475 # TSV + counts_gene: + - - meta: type: map description: | Groovy Map containing information related to the experiment as a whole @@ -65,8 +68,10 @@ output: Count values derived from tximport output after summarizeToGene(), without a 'countsFromAbundance' specification pattern: "*gene_counts.tsv" - - counts_gene_length_scaled: - - meta: + ontologies: + - edam: http://edamontology.org/format_3475 # TSV + counts_gene_length_scaled: + - - meta: type: map description: | Groovy Map containing information related to the experiment as a whole @@ -77,8 +82,10 @@ output: Count values derived from tximport output after summarizeToGene(), with a 'countsFromAbundance' specification of 'lengthScaledTPM' pattern: "*gene_counts_length_scaled.tsv" - - counts_gene_scaled: - - meta: + ontologies: + - edam: http://edamontology.org/format_3475 # TSV + counts_gene_scaled: + - - meta: type: map description: | Groovy Map containing information related to the experiment as a whole @@ -89,8 +96,10 @@ output: Count values derived from tximport output after summarizeToGene(), with a 'countsFromAbundance' specification of 'scaledTPM' pattern: "*gene_counts_scaled.tsv" - - lengths_gene: - - meta: + ontologies: + - edam: http://edamontology.org/format_3475 # TSV + lengths_gene: + - - meta: type: map description: | Groovy Map containing information related to the experiment as a whole @@ -101,8 +110,10 @@ output: Length values derived from tximport output after summarizeToGene(), without a 'countsFromAbundance' specification pattern: "*gene_lengths.tsv" - - tpm_transcript: - - meta: + ontologies: + - edam: http://edamontology.org/format_3475 # TSV + tpm_transcript: + - - meta: type: map description: | Groovy Map containing information related to the experiment as a whole @@ -113,8 +124,10 @@ output: Abundance (TPM) values derived from tximport output without summarizeToGene(), without a 'countsFromAbundance' specification pattern: "*transcript_tpm.tsv" - - counts_transcript: - - meta: + ontologies: + - edam: http://edamontology.org/format_3475 # TSV + counts_transcript: + - - meta: type: map description: | Groovy Map containing information related to the experiment as a whole @@ -125,8 +138,10 @@ output: Count values derived from tximport output without summarizeToGene(), without a 'countsFromAbundance' specification pattern: "*transcript_counts.tsv" - - lengths_transcript: - - meta: + ontologies: + - edam: http://edamontology.org/format_3475 # TSV + lengths_transcript: + - - meta: type: map description: | Groovy Map containing information related to the experiment as a whole @@ -137,11 +152,15 @@ output: Length values derived from tximport output without summarizeToGene(), without a 'countsFromAbundance' specification pattern: "*gene_lengths.tsv" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3475 # TSV + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML authors: - "@pinin4fjords" maintainers: diff --git a/modules/nf-core/tximeta/tximport/templates/tximport.r b/modules/nf-core/tximeta/tximport/templates/tximport.r index 5986c05d9..883935129 100755 --- a/modules/nf-core/tximeta/tximport/templates/tximport.r +++ b/modules/nf-core/tximeta/tximport/templates/tximport.r @@ -73,10 +73,10 @@ read_transcript_info <- function(tinfo_path){ } transcript_info <- read.csv(tinfo_path, sep="\t", header = TRUE, - col.names = c("tx", "gene_id", "gene_name")) + col.names = c("tx", "gene_id", "gene_name"), check.names = FALSE) extra <- setdiff(rownames(txi[[1]]), as.character(transcript_info[["tx"]])) - transcript_info <- rbind(transcript_info, data.frame(tx=extra, gene_id=extra, gene_name=extra)) + transcript_info <- rbind(transcript_info, data.frame(tx=extra, gene_id=extra, gene_name=extra, check.names = FALSE)) transcript_info <- transcript_info[match(rownames(txi[[1]]), transcript_info[["tx"]]), ] rownames(transcript_info) <- transcript_info[["tx"]] @@ -131,7 +131,7 @@ txi <- tximport(fns, type = '$quant_type', txOut = TRUE, dropInfReps = dropInfRe transcript_info <- read_transcript_info('$tx2gene') # Make coldata just to appease the summarizedexperiment -coldata <- data.frame(files = fns, names = names) +coldata <- data.frame(files = fns, names = names, check.names = FALSE) rownames(coldata) <- coldata[["names"]] # Create initial SummarizedExperiment object diff --git a/nextflow.config b/nextflow.config index b49c8e9db..1855f03b2 100644 --- a/nextflow.config +++ b/nextflow.config @@ -121,13 +121,13 @@ params { email_on_fail = null plaintext_email = false monochrome_logs = false - hook_url = null + hook_url = System.getenv('HOOK_URL') help = false help_full = false show_hidden = false version = false - trace_report_suffix = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss')// Config options pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/7f1614baeb0ddf66e60be78c3d9fa55440465ac8/' + trace_report_suffix = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') // Config options config_profile_name = null @@ -191,10 +191,14 @@ profiles { apptainer.enabled = false docker.runOptions = '-u $(id -u):$(id -g)' } - arm { - docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' + arm64 { + process.arch = 'arm64' includeConfig 'conf/arm.config' } + emulate_amd64 { + // Run AMD64 containers on ARM hardware using emulation (slower but more compatible) + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' + } singularity { singularity.enabled = true singularity.autoMounts = true @@ -249,18 +253,6 @@ profiles { wave.freeze = true wave.strategy = 'conda,container' } - gitpod { - executor.name = 'local' - executor.cpus = 4 - executor.memory = 8.GB - process { - resourceLimits = [ - memory: 8.GB, - cpus : 4, - time : 1.h - ] - } - } gpu { docker.runOptions = '-u $(id -u):$(id -g) --gpus all' apptainer.runOptions = '--nv' @@ -357,7 +349,7 @@ manifest { name: 'Jonathan Manning', affiliation: 'Seqera', github: '@pinin4fjords', - contribution: ['maintainer', 'contributor'], + contribution: ['author', 'maintainer'], orcid: '0000-0002-3483-8456' ], [ @@ -420,46 +412,19 @@ manifest { description = """RNA sequencing analysis pipeline for gene/isoform quantification and extensive quality control.""" mainScript = 'main.nf' defaultBranch = 'master' - nextflowVersion = '!>=24.10.5' - version = '3.21.0' + nextflowVersion = '!>=25.04.0' + version = '3.22.0' doi = 'https://doi.org/10.5281/zenodo.1400710' } // Nextflow plugins plugins { - id 'nf-schema@2.4.2' // Validation of pipeline parameters and creation of an input channel from a sample sheet + id 'nf-schema@2.5.1' // Validation of pipeline parameters and creation of an input channel from a sample sheet } validation { defaultIgnoreParams = ["genomes"] monochromeLogs = params.monochrome_logs - help { - enabled = true - command = "nextflow run nf-core/rnaseq -profile --input samplesheet.csv --outdir " - fullParameter = "help_full" - showHiddenParameter = "show_hidden" - beforeText = """ --\033[2m----------------------------------------------------\033[0m- - \033[0;32m,--.\033[0;30m/\033[0;32m,-.\033[0m -\033[0;34m ___ __ __ __ ___ \033[0;32m/,-._.--~\'\033[0m -\033[0;34m |\\ | |__ __ / ` / \\ |__) |__ \033[0;33m} {\033[0m -\033[0;34m | \\| | \\__, \\__/ | \\ |___ \033[0;32m\\`-._,-`-,\033[0m - \033[0;32m`._,._,\'\033[0m -\033[0;35m nf-core/rnaseq ${manifest.version}\033[0m --\033[2m----------------------------------------------------\033[0m- -""" - afterText = """${manifest.doi ? "\n* The pipeline\n" : ""}${manifest.doi.tokenize(",").collect { " https://doi.org/${it.trim().replace('https://doi.org/','')}"}.join("\n")}${manifest.doi ? "\n" : ""} -* The nf-core framework - https://doi.org/10.1038/s41587-020-0439-x - -* Software dependencies - https://github.com/nf-core/rnaseq/blob/master/CITATIONS.md -""" - } - summary { - beforeText = validation.help.beforeText - afterText = validation.help.afterText - } } // Load DSL2 module specific options diff --git a/nextflow_schema.json b/nextflow_schema.json index d2bc59295..852b5bdd4 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -111,7 +111,8 @@ "mimetype": "text/plain", "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", "fa_icon": "far fa-file-code", - "description": "Path to FASTA transcriptome file." + "description": "Path to FASTA transcriptome file.", + "help_text": "If not provided, the transcriptome will be generated from the genome FASTA and GTF files. Cannot be used together with `--additional_fasta` when building a pseudo-aligner index, because the pipeline cannot append spike-in sequences to a user-provided transcriptome. Either omit this parameter or provide a pre-built index." }, "additional_fasta": { "type": "string", @@ -121,7 +122,7 @@ "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", "fa_icon": "far fa-file-code", "description": "FASTA file to concatenate to genome FASTA file e.g. containing spike-in sequences.", - "help_text": "If provided, sequences in this file will be concatenated to the genome FASTA file. A GTF file will be automatically created using these sequences, and alignment indices will be created from the combined files. Use `--save_reference` to reuse these indices in future runs." + "help_text": "If provided, sequences in this file will be concatenated to the genome FASTA file. A GTF file will be automatically created using these sequences, and alignment indices will be created from the combined files. Use `--save_reference` to reuse these indices in future runs. Cannot be used together with `--transcript_fasta` when building a pseudo-aligner index - either omit `--transcript_fasta` or provide a pre-built index that already contains the spike-ins." }, "splicesites": { "type": "string", @@ -888,6 +889,18 @@ "help_text": "You can use '{date}' as a placeholder which will be replaced with the current date and time in the format 'yyyy-MM-dd_HH-mm-ss'. For example, 'run_{date}' will become 'run_2023-05-15_14-30-45'.", "errorMessage": "The trace report suffix must only contain alphanumeric characters, underscores, hyphens, dots, and curly braces for date placeholders.", "hidden": true + }, + "help": { + "type": ["boolean", "string"], + "description": "Display the help message." + }, + "help_full": { + "type": "boolean", + "description": "Display the full detailed help message." + }, + "show_hidden": { + "type": "boolean", + "description": "Display hidden parameters in the help message (only works when --help or --help_full are provided)." } } } diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json index b9e4f0870..808dcd1a4 100644 --- a/ro-crate-metadata.json +++ b/ro-crate-metadata.json @@ -22,21 +22,15 @@ "@id": "./", "@type": "Dataset", "creativeWorkStatus": "Stable", - "datePublished": "2025-09-18T10:15:27+00:00", - "description": "

\n \n \n \"nf-core/rnaseq\"\n \n

\n\n[![GitHub Actions CI Status](https://github.com/nf-core/rnaseq/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/rnaseq/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/rnaseq/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/rnaseq/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/rnaseq/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.1400710-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.1400710)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A524.10.5-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.2-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.2)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/rnaseq)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23rnaseq-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/rnaseq)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/rnaseq** is a bioinformatics pipeline that can be used to analyse RNA sequencing data obtained from organisms with a reference genome and annotation. It takes a samplesheet with FASTQ files or pre-aligned BAM files as input, performs quality control (QC), trimming and (pseudo-)alignment, and produces a gene expression matrix and extensive QC report.\n\n![nf-core/rnaseq metro map](docs/images/nf-core-rnaseq_metro_map_grey_animated.svg)\n\n> In case the image above is not loading, please have a look at the [static version](docs/images/nf-core-rnaseq_metro_map_grey.png).\n\n1. Merge re-sequenced FastQ files ([`cat`](http://www.linfo.org/cat.html))\n2. Auto-infer strandedness by subsampling and pseudoalignment ([`fq`](https://github.com/stjude-rust-labs/fq), [`Salmon`](https://combine-lab.github.io/salmon/))\n3. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))\n4. UMI extraction ([`UMI-tools`](https://github.com/CGATOxford/UMI-tools))\n5. Adapter and quality trimming ([`Trim Galore!`](https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/))\n6. Removal of genome contaminants ([`BBSplit`](http://seqanswers.com/forums/showthread.php?t=41288))\n7. Removal of ribosomal RNA ([`SortMeRNA`](https://github.com/biocore/sortmerna))\n8. Choice of multiple alignment and quantification routes (_For `STAR` the sentieon implementation can be chosen_):\n 1. [`STAR`](https://github.com/alexdobin/STAR) -> [`Salmon`](https://combine-lab.github.io/salmon/)\n 2. [`STAR`](https://github.com/alexdobin/STAR) -> [`RSEM`](https://github.com/deweylab/RSEM)\n 3. [`HiSAT2`](https://ccb.jhu.edu/software/hisat2/index.shtml) -> **NO QUANTIFICATION**\n9. Sort and index alignments ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/))\n10. UMI-based deduplication ([`UMI-tools`](https://github.com/CGATOxford/UMI-tools))\n11. Duplicate read marking ([`picard MarkDuplicates`](https://broadinstitute.github.io/picard/))\n12. Transcript assembly and quantification ([`StringTie`](https://ccb.jhu.edu/software/stringtie/))\n13. Create bigWig coverage files ([`BEDTools`](https://github.com/arq5x/bedtools2/), [`bedGraphToBigWig`](http://hgdownload.soe.ucsc.edu/admin/exe/))\n14. Extensive quality control:\n 1. [`RSeQC`](http://rseqc.sourceforge.net/)\n 2. [`Qualimap`](http://qualimap.bioinfo.cipf.es/)\n 3. [`dupRadar`](https://bioconductor.org/packages/release/bioc/html/dupRadar.html)\n 4. [`Preseq`](http://smithlabresearch.org/software/preseq/)\n 5. [`DESeq2`](https://bioconductor.org/packages/release/bioc/html/DESeq2.html)\n 6. [`Kraken2`](https://ccb.jhu.edu/software/kraken2/) -> [`Bracken`](https://ccb.jhu.edu/software/bracken/) on unaligned sequences; _optional_\n15. Pseudoalignment and quantification ([`Salmon`](https://combine-lab.github.io/salmon/) or ['Kallisto'](https://pachterlab.github.io/kallisto/); _optional_)\n16. Present QC for raw read, alignment, gene biotype, sample similarity, and strand-specificity checks ([`MultiQC`](http://multiqc.info/), [`R`](https://www.r-project.org/))\n\n> **Note**\n> The SRA download functionality has been removed from the pipeline (`>=3.2`) and ported to an independent workflow called [nf-core/fetchngs](https://nf-co.re/fetchngs). You can provide `--nf_core_pipeline rnaseq` when running nf-core/fetchngs to download and auto-create a samplesheet containing publicly available samples that can be accepted directly as input by this pipeline.\n\n> **Warning**\n> Quantification isn't performed if using `--aligner hisat2` due to the lack of an appropriate option to calculate accurate expression estimates from HISAT2 derived genomic alignments. However, you can use this route if you have a preference for the alignment, QC and other types of downstream analysis compatible with the output of HISAT2.\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n**samplesheet.csv**:\n\n```csv\nsample,fastq_1,fastq_2,strandedness\nCONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz,auto\nCONTROL_REP1,AEG588A1_S1_L003_R1_001.fastq.gz,AEG588A1_S1_L003_R2_001.fastq.gz,auto\nCONTROL_REP1,AEG588A1_S1_L004_R1_001.fastq.gz,AEG588A1_S1_L004_R2_001.fastq.gz,auto\n```\n\nEach row represents a fastq file (single-end) or a pair of fastq files (paired end). Rows with the same sample identifier are considered technical replicates and merged automatically. The strandedness refers to the library preparation and will be automatically inferred if set to `auto`.\n\nThe pipeline supports a two-step reprocessing workflow using BAM files from previous runs. Run initially with `--save_align_intermeds` to generate a samplesheet with BAM paths, then reprocess using `--skip_alignment` for efficient downstream analysis without repeating expensive alignment steps. This feature is designed specifically for pipeline-generated BAMs.\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/rnaseq \\\n --input \\\n --outdir \\\n --gtf \\\n --fasta \\\n -profile \n```\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/rnaseq/usage) and the [parameter documentation](https://nf-co.re/rnaseq/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/rnaseq/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/rnaseq/output).\n\nThis pipeline quantifies RNA-sequenced reads relative to genes/transcripts in the genome and normalizes the resulting data. It does not compare the samples statistically in order to assign significance in the form of FDR or P-values. For downstream analyses, the output files from this pipeline can be analysed directly in statistical environments like [R](https://www.r-project.org/), [Julia](https://julialang.org/) or via the [nf-core/differentialabundance](https://github.com/nf-core/differentialabundance/) pipeline.\n\n## Online videos\n\nA short talk about the history, current status and functionality on offer in this pipeline was given by Harshil Patel ([@drpatelh](https://github.com/drpatelh)) on [8th February 2022](https://nf-co.re/events/2022/bytesize-32-nf-core-rnaseq) as part of the nf-core/bytesize series.\n\nYou can find numerous talks on the [nf-core events page](https://nf-co.re/events) from various topics including writing pipelines/modules in Nextflow DSL2, using nf-core tooling, running nf-core pipelines as well as more generic content like contributing to Github. Please check them out!\n\n## Credits\n\nThese scripts were originally written for use at the [National Genomics Infrastructure](https://ngisweden.scilifelab.se), part of [SciLifeLab](http://www.scilifelab.se/) in Stockholm, Sweden, by Phil Ewels ([@ewels](https://github.com/ewels)) and Rickard Hammar\u00e9n ([@Hammarn](https://github.com/Hammarn)).\n\nThe pipeline was re-written in Nextflow DSL2 and is primarily maintained by Harshil Patel ([@drpatelh](https://github.com/drpatelh)) from [Seqera Labs, Spain](https://seqera.io/).\n\nThe pipeline workflow diagram was initially designed by Sarah Guinchard ([@G-Sarah](https://github.com/G-Sarah)) and James Fellows Yates ([@jfy133](https://github.com/jfy133)), further modifications where made by Harshil Patel ([@drpatelh](https://github.com/drpatelh)) and Maxime Garcia ([@maxulysse](https://github.com/maxulysse)).\n\nMany thanks to other who have helped out along the way too, including (but not limited to):\n\n- [Alex Peltzer](https://github.com/apeltzer)\n- [Colin Davenport](https://github.com/colindaven)\n- [Denis Moreno](https://github.com/Galithil)\n- [Edmund Miller](https://github.com/edmundmiller)\n- [Gregor Sturm](https://github.com/grst)\n- [Jacki Buros Novik](https://github.com/jburos)\n- [Lorena Pantano](https://github.com/lpantano)\n- [Matthias Zepper](https://github.com/MatthiasZepper)\n- [Maxime Garcia](https://github.com/maxulysse)\n- [Olga Botvinnik](https://github.com/olgabot)\n- [@orzechoj](https://github.com/orzechoj)\n- [Paolo Di Tommaso](https://github.com/pditommaso)\n- [Rob Syme](https://github.com/robsyme)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#rnaseq` channel](https://nfcore.slack.com/channels/rnaseq) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/rnaseq for your analysis, please cite it using the following doi: [10.5281/zenodo.1400710](https://doi.org/10.5281/zenodo.1400710)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", + "datePublished": "2025-11-26T10:00:00+00:00", + "description": "

\n \n \n \"nf-core/rnaseq\"\n \n

\n\n[![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/nf-core/rnaseq)\n[![GitHub Actions CI Status](https://github.com/nf-core/rnaseq/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/rnaseq/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/rnaseq/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/rnaseq/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/rnaseq/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.1400710-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.1400710)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.04.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.5.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.5.1)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/rnaseq)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23rnaseq-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/rnaseq)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/rnaseq** is a bioinformatics pipeline that can be used to analyse RNA sequencing data obtained from organisms with a reference genome and annotation. It takes a samplesheet with FASTQ files or pre-aligned BAM files as input, performs quality control (QC), trimming and (pseudo-)alignment, and produces a gene expression matrix and extensive QC report.\n\n![nf-core/rnaseq metro map](docs/images/nf-core-rnaseq_metro_map_grey_animated.svg)\n\n> In case the image above is not loading, please have a look at the [static version](docs/images/nf-core-rnaseq_metro_map_grey.png).\n\n1. Merge re-sequenced FastQ files ([`cat`](http://www.linfo.org/cat.html))\n2. Auto-infer strandedness by subsampling and pseudoalignment ([`fq`](https://github.com/stjude-rust-labs/fq), [`Salmon`](https://combine-lab.github.io/salmon/))\n3. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))\n4. UMI extraction ([`UMI-tools`](https://github.com/CGATOxford/UMI-tools))\n5. Adapter and quality trimming ([`Trim Galore!`](https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/))\n6. Removal of genome contaminants ([`BBSplit`](http://seqanswers.com/forums/showthread.php?t=41288))\n7. Removal of ribosomal RNA ([`SortMeRNA`](https://github.com/biocore/sortmerna))\n8. Choice of multiple alignment and quantification routes (_For `STAR` the sentieon implementation can be chosen_):\n 1. [`STAR`](https://github.com/alexdobin/STAR) -> [`Salmon`](https://combine-lab.github.io/salmon/)\n 2. [`STAR`](https://github.com/alexdobin/STAR) -> [`RSEM`](https://github.com/deweylab/RSEM)\n 3. [`HiSAT2`](https://ccb.jhu.edu/software/hisat2/index.shtml) -> **NO QUANTIFICATION**\n9. Sort and index alignments ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/))\n10. UMI-based deduplication ([`UMI-tools`](https://github.com/CGATOxford/UMI-tools))\n11. Duplicate read marking ([`picard MarkDuplicates`](https://broadinstitute.github.io/picard/))\n12. Transcript assembly and quantification ([`StringTie`](https://ccb.jhu.edu/software/stringtie/))\n13. Create bigWig coverage files ([`BEDTools`](https://github.com/arq5x/bedtools2/), [`bedGraphToBigWig`](http://hgdownload.soe.ucsc.edu/admin/exe/))\n14. Extensive quality control:\n 1. [`RSeQC`](http://rseqc.sourceforge.net/)\n 2. [`Qualimap`](http://qualimap.bioinfo.cipf.es/)\n 3. [`dupRadar`](https://bioconductor.org/packages/release/bioc/html/dupRadar.html)\n 4. [`Preseq`](http://smithlabresearch.org/software/preseq/)\n 5. [`DESeq2`](https://bioconductor.org/packages/release/bioc/html/DESeq2.html)\n 6. [`Kraken2`](https://ccb.jhu.edu/software/kraken2/) -> [`Bracken`](https://ccb.jhu.edu/software/bracken/) on unaligned sequences; _optional_\n15. Pseudoalignment and quantification ([`Salmon`](https://combine-lab.github.io/salmon/) or ['Kallisto'](https://pachterlab.github.io/kallisto/); _optional_)\n16. Present QC for raw read, alignment, gene biotype, sample similarity, and strand-specificity checks ([`MultiQC`](http://multiqc.info/), [`R`](https://www.r-project.org/))\n\n> **Note**\n> The SRA download functionality has been removed from the pipeline (`>=3.2`) and ported to an independent workflow called [nf-core/fetchngs](https://nf-co.re/fetchngs). You can provide `--nf_core_pipeline rnaseq` when running nf-core/fetchngs to download and auto-create a samplesheet containing publicly available samples that can be accepted directly as input by this pipeline.\n\n> **Warning**\n> Quantification isn't performed if using `--aligner hisat2` due to the lack of an appropriate option to calculate accurate expression estimates from HISAT2 derived genomic alignments. However, you can use this route if you have a preference for the alignment, QC and other types of downstream analysis compatible with the output of HISAT2.\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n**samplesheet.csv**:\n\n```csv\nsample,fastq_1,fastq_2,strandedness\nCONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz,auto\nCONTROL_REP1,AEG588A1_S1_L003_R1_001.fastq.gz,AEG588A1_S1_L003_R2_001.fastq.gz,auto\nCONTROL_REP1,AEG588A1_S1_L004_R1_001.fastq.gz,AEG588A1_S1_L004_R2_001.fastq.gz,auto\n```\n\nEach row represents a fastq file (single-end) or a pair of fastq files (paired end). Rows with the same sample identifier are considered technical replicates and merged automatically. The strandedness refers to the library preparation and will be automatically inferred if set to `auto`.\n\nThe pipeline supports a two-step reprocessing workflow using BAM files from previous runs. Run initially with `--save_align_intermeds` to generate a samplesheet with BAM paths, then reprocess using `--skip_alignment` for efficient downstream analysis without repeating expensive alignment steps. This feature is designed specifically for pipeline-generated BAMs.\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/rnaseq \\\n --input \\\n --outdir \\\n --gtf \\\n --fasta \\\n -profile \n```\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/rnaseq/usage) and the [parameter documentation](https://nf-co.re/rnaseq/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/rnaseq/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/rnaseq/output).\n\nThis pipeline quantifies RNA-sequenced reads relative to genes/transcripts in the genome and normalizes the resulting data. It does not compare the samples statistically in order to assign significance in the form of FDR or P-values. For downstream analyses, the output files from this pipeline can be analysed directly in statistical environments like [R](https://www.r-project.org/), [Julia](https://julialang.org/) or via the [nf-core/differentialabundance](https://github.com/nf-core/differentialabundance/) pipeline.\n\n## Online videos\n\nA short talk about the history, current status and functionality on offer in this pipeline was given by Harshil Patel ([@drpatelh](https://github.com/drpatelh)) on [8th February 2022](https://nf-co.re/events/2022/bytesize-32-nf-core-rnaseq) as part of the nf-core/bytesize series.\n\nYou can find numerous talks on the [nf-core events page](https://nf-co.re/events) from various topics including writing pipelines/modules in Nextflow DSL2, using nf-core tooling, running nf-core pipelines as well as more generic content like contributing to Github. Please check them out!\n\n## Credits\n\nThese scripts were originally written for use at the [National Genomics Infrastructure](https://ngisweden.scilifelab.se), part of [SciLifeLab](http://www.scilifelab.se/) in Stockholm, Sweden, by Phil Ewels ([@ewels](https://github.com/ewels)) and Rickard Hammar\u00e9n ([@Hammarn](https://github.com/Hammarn)).\n\nThe pipeline was re-written in Nextflow DSL2 by Harshil Patel ([@drpatelh](https://github.com/drpatelh)) from [Seqera Labs, Spain](https://seqera.io/), and is currently maintained by Jonathan Manning ([@pinin4fjords](https://github.com/pinin4fjords)) and the nf-core community.\n\nThe pipeline workflow diagram was initially designed by Sarah Guinchard ([@G-Sarah](https://github.com/G-Sarah)) and James Fellows Yates ([@jfy133](https://github.com/jfy133)), further modifications where made by Harshil Patel ([@drpatelh](https://github.com/drpatelh)) and Maxime Garcia ([@maxulysse](https://github.com/maxulysse)).\n\nMany thanks to other who have helped out along the way too, including (but not limited to):\n\n- [Alex Peltzer](https://github.com/apeltzer)\n- [Colin Davenport](https://github.com/colindaven)\n- [Denis Moreno](https://github.com/Galithil)\n- [Edmund Miller](https://github.com/edmundmiller)\n- [Gregor Sturm](https://github.com/grst)\n- [Jacki Buros Novik](https://github.com/jburos)\n- [Lorena Pantano](https://github.com/lpantano)\n- [Matthias Zepper](https://github.com/MatthiasZepper)\n- [Maxime Garcia](https://github.com/maxulysse)\n- [Olga Botvinnik](https://github.com/olgabot)\n- [@orzechoj](https://github.com/orzechoj)\n- [Paolo Di Tommaso](https://github.com/pditommaso)\n- [Rob Syme](https://github.com/robsyme)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#rnaseq` channel](https://nfcore.slack.com/channels/rnaseq) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/rnaseq for your analysis, please cite it using the following doi: [10.5281/zenodo.1400710](https://doi.org/10.5281/zenodo.1400710)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", "hasPart": [ { "@id": "main.nf" }, - { - "@id": "docs/images/nf-core-rnaseq_metro_map_grey.png" - }, { "@id": "assets/" }, - { - "@id": "bin/" - }, { "@id": "conf/" }, @@ -49,9 +43,6 @@ { "@id": "modules/" }, - { - "@id": "modules/local/" - }, { "@id": "modules/nf-core/" }, @@ -108,7 +99,7 @@ }, "mentions": [ { - "@id": "#fe70c328-5491-4997-ba53-2934115f43f8" + "@id": "#0ad48f19-9581-4e1b-b10f-638ab0a48482" } ], "name": "nf-core/rnaseq" @@ -137,84 +128,12 @@ ], "creator": [ { - "@id": "#31933289+amayer21@users.noreply.github.com" - }, - { - "@id": "https://orcid.org/0000-0002-3859-3249" - }, - { - "@id": "https://orcid.org/0000-0002-1968-2270" - }, - { - "@id": "https://orcid.org/0000-0002-6503-2180" - }, - { - "@id": "https://orcid.org/0000-0003-4412-7970" - }, - { - "@id": "https://orcid.org/0000-0001-9584-7842" - }, - { - "@id": "https://orcid.org/0000-0002-8721-2350" - }, - { - "@id": "https://orcid.org/0000-0002-8824-1946" - }, - { - "@id": "https://orcid.org/0000-0003-3966-8481" - }, - { - "@id": "https://orcid.org/0000-0002-2798-9870" - }, - { - "@id": "#42973691+d4straub@users.noreply.github.com" - }, - { - "@id": "https://orcid.org/0000-0001-9017-591X" - }, - { - "@id": "#zhenfeng.liu1@gmail.com" - }, - { - "@id": "#omeally@gmail.com" - }, - { - "@id": "https://orcid.org/0000-0002-5748-9594" - }, - { - "@id": "https://orcid.org/0000-0003-0603-7907" - }, - { - "@id": "#sven.fillinger@qbic.uni-tuebingen.de" - }, - { - "@id": "https://orcid.org/0009-0009-7515-5000" - }, - { - "@id": "#zhoupenggeni@gmail.com" - }, - { - "@id": "#jonathan.manning@seqera.io" - }, - { - "@id": "#drpatelh@users.noreply.github.com" - }, - { - "@id": "https://orcid.org/0000-0003-3220-0253" - }, - { - "@id": "#chuan.wang@scilifelab.se" - }, - { - "@id": "#phil.ewels@seqera.io" + "@id": "#phil.ewels@scilifelab.se" } ], "dateCreated": "", - "dateModified": "2025-09-18T11:15:27Z", + "dateModified": "2025-11-26T10:00:00Z", "dct:conformsTo": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/", - "image": { - "@id": "docs/images/nf-core-rnaseq_metro_map_grey.png" - }, "keywords": [ "nf-core", "nextflow", @@ -224,29 +143,6 @@ "license": [ "MIT" ], - "maintainer": [ - { - "@id": "https://orcid.org/0000-0002-6503-2180" - }, - { - "@id": "https://orcid.org/0000-0003-4412-7970" - }, - { - "@id": "https://orcid.org/0000-0002-8721-2350" - }, - { - "@id": "https://orcid.org/0000-0001-9017-591X" - }, - { - "@id": "#jonathan.manning@seqera.io" - }, - { - "@id": "#drpatelh@users.noreply.github.com" - }, - { - "@id": "#phil.ewels@seqera.io" - } - ], "name": [ "nf-core/rnaseq" ], @@ -258,10 +154,10 @@ }, "url": [ "https://github.com/nf-core/rnaseq", - "https://nf-co.re/rnaseq/3.21.0/" + "https://nf-co.re/rnaseq/3.22.0/" ], "version": [ - "3.21.0" + "3.22.0" ] }, { @@ -274,22 +170,14 @@ "url": { "@id": "https://www.nextflow.io/" }, - "version": "!>=24.10.5" + "version": "!>=25.04.0" }, { - "@id": "docs/images/nf-core-rnaseq_metro_map_grey.png", - "@type": [ - "File", - "ImageObject" - ], - "name": "Workflow diagram" - }, - { - "@id": "#fe70c328-5491-4997-ba53-2934115f43f8", + "@id": "#0ad48f19-9581-4e1b-b10f-638ab0a48482", "@type": "TestSuite", "instance": [ { - "@id": "#39e8f629-2b74-47e7-9ca2-8653da3910ab" + "@id": "#d85e8f82-5b12-4ac5-842a-1621342f3e0b" } ], "mainEntity": { @@ -298,7 +186,7 @@ "name": "Test suite for nf-core/rnaseq" }, { - "@id": "#39e8f629-2b74-47e7-9ca2-8653da3910ab", + "@id": "#d85e8f82-5b12-4ac5-842a-1621342f3e0b", "@type": "TestInstance", "name": "GitHub Actions workflow for testing nf-core/rnaseq", "resource": "repos/nf-core/rnaseq/actions/workflows/nf-test.yml", @@ -320,11 +208,6 @@ "@type": "Dataset", "description": "Additional files" }, - { - "@id": "bin/", - "@type": "Dataset", - "description": "Scripts that must be callable from a pipeline process" - }, { "@id": "conf/", "@type": "Dataset", @@ -345,11 +228,6 @@ "@type": "Dataset", "description": "Modules used by the pipeline" }, - { - "@id": "modules/local/", - "@type": "Dataset", - "description": "Pipeline-specific modules" - }, { "@id": "modules/nf-core/", "@type": "Dataset", @@ -437,147 +315,9 @@ "url": "https://nf-co.re/" }, { - "@id": "#31933289+amayer21@users.noreply.github.com", - "@type": "Person", - "email": "31933289+amayer21@users.noreply.github.com", - "name": "Alice Mayer" - }, - { - "@id": "https://orcid.org/0000-0002-3859-3249", - "@type": "Person", - "email": "lorena.pantano@gmail.com", - "name": "Lorena Pantano" - }, - { - "@id": "https://orcid.org/0000-0002-1968-2270", - "@type": "Person", - "email": "anandasanil@gmail.com", - "name": "Anandashankar Anil" - }, - { - "@id": "https://orcid.org/0000-0002-6503-2180", - "@type": "Person", - "email": "apeltzer@users.noreply.github.com", - "name": "Alexander Peltzer" - }, - { - "@id": "https://orcid.org/0000-0003-4412-7970", - "@type": "Person", - "email": "olga.botvinnik@gmail.com", - "name": "Olga Botvinnik" - }, - { - "@id": "https://orcid.org/0000-0001-9584-7842", - "@type": "Person", - "email": "mail@gregor-sturm.de", - "name": "Gregor Sturm" - }, - { - "@id": "https://orcid.org/0000-0002-8721-2350", - "@type": "Person", - "email": "rob.syme@gmail.com", - "name": "Robert Syme" - }, - { - "@id": "https://orcid.org/0000-0002-8824-1946", - "@type": "Person", - "email": "gisela.gabernet@gmail.com", - "name": "Gisela Gabernet Garriga" - }, - { - "@id": "https://orcid.org/0000-0003-3966-8481", - "@type": "Person", - "email": "pcantalupo@gmail.com", - "name": "Paul Cantalupo" - }, - { - "@id": "https://orcid.org/0000-0002-2798-9870", - "@type": "Person", - "email": "silvia.morini01@gmail.com", - "name": "Silvia Morini" - }, - { - "@id": "#42973691+d4straub@users.noreply.github.com", - "@type": "Person", - "email": "42973691+d4straub@users.noreply.github.com", - "name": "Daniel Straub" - }, - { - "@id": "https://orcid.org/0000-0001-9017-591X", - "@type": "Person", - "email": "rickard.hammaren@scilifelab.se", - "name": "Rickard Hammar\u00e9n" - }, - { - "@id": "#zhenfeng.liu1@gmail.com", - "@type": "Person", - "email": "zhenfeng.liu1@gmail.com", - "name": "Zhenfeng Liu" - }, - { - "@id": "#omeally@gmail.com", - "@type": "Person", - "email": "omeally@gmail.com", - "name": "Denis OMeally" - }, - { - "@id": "https://orcid.org/0000-0002-5748-9594", - "@type": "Person", - "email": "pranathi93.vemuri@gmail.com", - "name": "Pranathi Vemuri" - }, - { - "@id": "https://orcid.org/0000-0003-0603-7907", - "@type": "Person", - "email": "sabrina.krakau@qbic.uni-tuebingen.de", - "name": "Sabrina Krakau" - }, - { - "@id": "#sven.fillinger@qbic.uni-tuebingen.de", - "@type": "Person", - "email": "sven.fillinger@qbic.uni-tuebingen.de", - "name": "Sven F" - }, - { - "@id": "https://orcid.org/0009-0009-7515-5000", - "@type": "Person", - "email": "sofia.haglund@scilifelab.se", - "name": "Sofia Haglund" - }, - { - "@id": "#zhoupenggeni@gmail.com", - "@type": "Person", - "email": "zhoupenggeni@gmail.com", - "name": "Peng Zhou" - }, - { - "@id": "#jonathan.manning@seqera.io", - "@type": "Person", - "email": "jonathan.manning@seqera.io", - "name": "Jonathan Manning" - }, - { - "@id": "#drpatelh@users.noreply.github.com", - "@type": "Person", - "email": "drpatelh@users.noreply.github.com", - "name": "Harshil Patel" - }, - { - "@id": "https://orcid.org/0000-0003-3220-0253", - "@type": "Person", - "email": "paolo.ditommaso@gmail.com", - "name": "Paolo Di Tommaso" - }, - { - "@id": "#chuan.wang@scilifelab.se", - "@type": "Person", - "email": "chuan.wang@scilifelab.se", - "name": "Chuan Wang" - }, - { - "@id": "#phil.ewels@seqera.io", + "@id": "#phil.ewels@scilifelab.se", "@type": "Person", - "email": "phil.ewels@seqera.io", + "email": "phil.ewels@scilifelab.se", "name": "Phil Ewels" } ] diff --git a/subworkflows/local/utils_nfcore_rnaseq_pipeline/main.nf b/subworkflows/local/utils_nfcore_rnaseq_pipeline/main.nf index e5dd439ea..a94cac101 100644 --- a/subworkflows/local/utils_nfcore_rnaseq_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_rnaseq_pipeline/main.nf @@ -10,6 +10,8 @@ include { UTILS_NFSCHEMA_PLUGIN } from '../../nf-core/utils_nfschema_plugin' include { paramsSummaryMap } from 'plugin/nf-schema' +include { samplesheetToList } from 'plugin/nf-schema' +include { paramsHelp } from 'plugin/nf-schema' include { completionEmail } from '../../nf-core/utils_nfcore_pipeline' include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' include { imNotification } from '../../nf-core/utils_nfcore_pipeline' @@ -32,10 +34,14 @@ workflow PIPELINE_INITIALISATION { monochrome_logs // boolean: Do not use coloured log outputs nextflow_cli_args // array: List of positional nextflow CLI args outdir // string: The output directory where the results will be saved + input // string: Path to input samplesheet + help // boolean: Display help message and exit + help_full // boolean: Show the full help message + show_hidden // boolean: Show hidden parameters in the help message main: - ch_versions = Channel.empty() + ch_versions = channel.empty() // // Print version and exit if required and dump pipeline parameters to JSON file @@ -50,10 +56,35 @@ workflow PIPELINE_INITIALISATION { // // Validate parameters and generate parameter summary to stdout // + before_text = """ +-\033[2m----------------------------------------------------\033[0m- + \033[0;32m,--.\033[0;30m/\033[0;32m,-.\033[0m +\033[0;34m ___ __ __ __ ___ \033[0;32m/,-._.--~\'\033[0m +\033[0;34m |\\ | |__ __ / ` / \\ |__) |__ \033[0;33m} {\033[0m +\033[0;34m | \\| | \\__, \\__/ | \\ |___ \033[0;32m\\`-._,-`-,\033[0m + \033[0;32m`._,._,\'\033[0m +\033[0;35m nf-core/rnaseq ${workflow.manifest.version}\033[0m +-\033[2m----------------------------------------------------\033[0m- +""" + after_text = """${workflow.manifest.doi ? "\n* The pipeline\n" : ""}${workflow.manifest.doi.tokenize(",").collect { doi -> " https://doi.org/${doi.trim().replace('https://doi.org/','')}"}.join("\n")}${workflow.manifest.doi ? "\n" : ""} +* The nf-core framework + https://doi.org/10.1038/s41587-020-0439-x + +* Software dependencies + https://github.com/nf-core/rnaseq/blob/master/CITATIONS.md +""" + command = "nextflow run ${workflow.manifest.name} -profile --input samplesheet.csv --outdir " + UTILS_NFSCHEMA_PLUGIN ( workflow, validate_params, - null + null, + help, + help_full, + show_hidden, + before_text, + after_text, + command ) // @@ -232,6 +263,23 @@ def validateInputParameters() { } if (params.transcript_fasta) { + // Only error if additional_fasta is provided AND we need to build a pseudo-aligner index + // (i.e., no pre-built salmon/kallisto index provided). If the user provides a pre-built + // index that already contains the spike-ins, the combination is valid. + if (params.additional_fasta) { + def needs_to_build_index = false + if (!params.skip_pseudo_alignment && params.pseudo_aligner) { + // Check if the relevant index for the selected pseudo-aligner is missing + if (params.pseudo_aligner == 'salmon' && !params.salmon_index) { + needs_to_build_index = true + } else if (params.pseudo_aligner == 'kallisto' && !params.kallisto_index) { + needs_to_build_index = true + } + } + if (needs_to_build_index) { + transcriptFastaAdditionalFastaError() + } + } transcriptsFastaWarn() } @@ -465,6 +513,28 @@ def transcriptsFastaWarn() { "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" } +// +// Print an error if using both '--transcript_fasta' and '--additional_fasta' without a pre-built index +// +def transcriptFastaAdditionalFastaError() { + def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " Both '--transcript_fasta' and '--additional_fasta' have been provided,\n" + + " but no pre-built pseudo-aligner index (--salmon_index/--kallisto_index).\n\n" + + " The pipeline cannot append additional sequences (e.g. ERCC spike-ins) to a\n" + + " user-provided transcriptome FASTA file. This would cause quantification to\n" + + " fail because the built index would not contain the additional sequences.\n\n" + + " Please either:\n" + + " - Remove '--transcript_fasta' and let the pipeline generate the\n" + + " transcriptome from the genome FASTA and GTF (recommended), or\n" + + " - Provide a pre-built index (--salmon_index/--kallisto_index) that\n" + + " already contains the additional sequences, or\n" + + " - Remove '--additional_fasta' if you do not need spike-in sequences.\n\n" + + " Please see:\n" + + " https://github.com/nf-core/rnaseq/issues/1450\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + error(error_string) +} + // // Print a warning if --skip_alignment has been provided // diff --git a/subworkflows/local/utils_nfcore_rnaseq_pipeline/tests/main.pipeline_initialisation.workflow.nf.test b/subworkflows/local/utils_nfcore_rnaseq_pipeline/tests/main.pipeline_initialisation.workflow.nf.test index 87e498887..d304eccb5 100644 --- a/subworkflows/local/utils_nfcore_rnaseq_pipeline/tests/main.pipeline_initialisation.workflow.nf.test +++ b/subworkflows/local/utils_nfcore_rnaseq_pipeline/tests/main.pipeline_initialisation.workflow.nf.test @@ -33,6 +33,10 @@ nextflow_workflow { input[2] = true // monochrome_logs (boolean) input[3] = args // args (array) input[4] = file("$outputDir") // outdir (string) + input[5] = null // input (string) + input[6] = false // help (boolean) + input[7] = false // help_full (boolean) + input[8] = false // show_hidden (boolean) """ } } diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf index bfd258760..2f30e9a46 100644 --- a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -98,7 +98,7 @@ def workflowVersionToYAML() { // Get channel of software versions used in pipeline in YAML format // def softwareVersionsToYAML(ch_versions) { - return ch_versions.unique().map { version -> processVersionsFromYAML(version) }.unique().mix(Channel.of(workflowVersionToYAML())) + return ch_versions.unique().map { version -> processVersionsFromYAML(version) }.unique().mix(channel.of(workflowVersionToYAML())) } // diff --git a/subworkflows/nf-core/utils_nfschema_plugin/main.nf b/subworkflows/nf-core/utils_nfschema_plugin/main.nf index 4994303ea..ee4738c8d 100644 --- a/subworkflows/nf-core/utils_nfschema_plugin/main.nf +++ b/subworkflows/nf-core/utils_nfschema_plugin/main.nf @@ -4,6 +4,7 @@ include { paramsSummaryLog } from 'plugin/nf-schema' include { validateParameters } from 'plugin/nf-schema' +include { paramsHelp } from 'plugin/nf-schema' workflow UTILS_NFSCHEMA_PLUGIN { @@ -15,29 +16,56 @@ workflow UTILS_NFSCHEMA_PLUGIN { // when this input is empty it will automatically use the configured schema or // "${projectDir}/nextflow_schema.json" as default. This input should not be empty // for meta pipelines + help // boolean: show help message + help_full // boolean: show full help message + show_hidden // boolean: show hidden parameters in help message + before_text // string: text to show before the help message and parameters summary + after_text // string: text to show after the help message and parameters summary + command // string: an example command of the pipeline main: + if(help || help_full) { + help_options = [ + beforeText: before_text, + afterText: after_text, + command: command, + showHidden: show_hidden, + fullHelp: help_full, + ] + if(parameters_schema) { + help_options << [parametersSchema: parameters_schema] + } + log.info paramsHelp( + help_options, + params.help instanceof String ? params.help : "", + ) + exit 0 + } + // // Print parameter summary to stdout. This will display the parameters // that differ from the default given in the JSON schema // + + summary_options = [:] if(parameters_schema) { - log.info paramsSummaryLog(input_workflow, parameters_schema:parameters_schema) - } else { - log.info paramsSummaryLog(input_workflow) + summary_options << [parametersSchema: parameters_schema] } + log.info before_text + log.info paramsSummaryLog(summary_options, input_workflow) + log.info after_text // // Validate the parameters using nextflow_schema.json or the schema // given via the validation.parametersSchema configuration option // if(validate_params) { + validateOptions = [:] if(parameters_schema) { - validateParameters(parameters_schema:parameters_schema) - } else { - validateParameters() + validateOptions << [parametersSchema: parameters_schema] } + validateParameters(validateOptions) } emit: diff --git a/subworkflows/nf-core/utils_nfschema_plugin/tests/main.nf.test b/subworkflows/nf-core/utils_nfschema_plugin/tests/main.nf.test index 0ca34636f..5202f1677 100644 --- a/subworkflows/nf-core/utils_nfschema_plugin/tests/main.nf.test +++ b/subworkflows/nf-core/utils_nfschema_plugin/tests/main.nf.test @@ -21,6 +21,12 @@ nextflow_workflow { input[0] = workflow input[1] = validate_params input[2] = "" + input[3] = false + input[4] = false + input[5] = false + input[6] = "" + input[7] = "" + input[8] = "" """ } } @@ -47,6 +53,12 @@ nextflow_workflow { input[0] = workflow input[1] = validate_params input[2] = "" + input[3] = false + input[4] = false + input[5] = false + input[6] = "" + input[7] = "" + input[8] = "" """ } } @@ -73,6 +85,12 @@ nextflow_workflow { input[0] = workflow input[1] = validate_params input[2] = "${projectDir}/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json" + input[3] = false + input[4] = false + input[5] = false + input[6] = "" + input[7] = "" + input[8] = "" """ } } @@ -99,6 +117,12 @@ nextflow_workflow { input[0] = workflow input[1] = validate_params input[2] = "${projectDir}/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json" + input[3] = false + input[4] = false + input[5] = false + input[6] = "" + input[7] = "" + input[8] = "" """ } } @@ -110,4 +134,36 @@ nextflow_workflow { ) } } + + test("Should create a help message") { + + when { + + params { + test_data = '' + outdir = null + } + + workflow { + """ + validate_params = true + input[0] = workflow + input[1] = validate_params + input[2] = "${projectDir}/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json" + input[3] = true + input[4] = false + input[5] = false + input[6] = "Before" + input[7] = "After" + input[8] = "nextflow run test/test" + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } } diff --git a/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config b/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config index 09ef842ae..8d8c73718 100644 --- a/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config +++ b/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config @@ -1,8 +1,8 @@ plugins { - id "nf-schema@2.4.2" + id "nf-schema@2.5.1" } validation { parametersSchema = "${projectDir}/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json" monochromeLogs = true -} \ No newline at end of file +} diff --git a/tests/.nftignore b/tests/.nftignore index 2013f8551..8a0e07fc6 100644 --- a/tests/.nftignore +++ b/tests/.nftignore @@ -2,12 +2,22 @@ .DS_Store bbsplit/*.stats.txt fastqc/*/*.{html,zip} +fastqc/*_fastqc.{html,zip} fq_lint/*/*.fq_lint.txt hisat2/log/*.hisat2.summary.log kallisto/*/abundance.{h5,tsv} kallisto/*/kallisto_quant.log kallisto/*/run_info.json kallisto/kallisto.* +multiqc/multiqc_data/fastqc_top_overrepresented_sequences_table.txt +multiqc/multiqc_data/multiqc.parquet +multiqc/multiqc_data/multiqc.log +multiqc/multiqc_data/multiqc_data.json +multiqc/multiqc_data/multiqc_sources.txt +multiqc/multiqc_data/multiqc_software_versions.txt +multiqc/multiqc_data/llms-full.txt +multiqc/multiqc_plots/{svg,pdf,png}/*.{svg,pdf,png} +multiqc/multiqc_report.html pipeline_info/*.{html,json,txt,yml} sortmerna/*.sortmerna.log star_rsem/*.stat/*.{cnt,model,theta} diff --git a/tests/bam_input.nf.test.snap b/tests/bam_input.nf.test.snap index 17dfec744..388233d7d 100644 --- a/tests/bam_input.nf.test.snap +++ b/tests/bam_input.nf.test.snap @@ -1,7 +1,7 @@ { "BAM input for Salmon": { "content": [ - 139, + 144, { "BBMAP_BBSPLIT": { "bbmap": 39.18 @@ -79,6 +79,9 @@ "SAMTOOLS_INDEX": { "samtools": 1.21 }, + "SAMTOOLS_SORT_QUALIMAP": { + "samtools": 1.21 + }, "SAMTOOLS_STATS": { "samtools": 1.21 }, @@ -958,7 +961,7 @@ }, "BAM input for RSEM": { "content": [ - 138, + 143, { "BBMAP_BBSPLIT": { "bbmap": 39.18 @@ -1041,6 +1044,9 @@ "SAMTOOLS_INDEX": { "samtools": 1.21 }, + "SAMTOOLS_SORT_QUALIMAP": { + "samtools": 1.21 + }, "SAMTOOLS_STATS": { "samtools": 1.21 }, diff --git a/tests/default.nf.test.snap b/tests/default.nf.test.snap index 8571c3100..0798e8ef4 100644 --- a/tests/default.nf.test.snap +++ b/tests/default.nf.test.snap @@ -102,7 +102,7 @@ }, "Params: default": { "content": [ - 209, + 214, { "BBMAP_BBSPLIT": { "bbmap": 39.18 @@ -199,6 +199,9 @@ "SAMTOOLS_SORT": { "samtools": 1.21 }, + "SAMTOOLS_SORT_QUALIMAP": { + "samtools": 1.21 + }, "SAMTOOLS_STATS": { "samtools": 1.21 }, diff --git a/tests/featurecounts_group_type.nf.test.snap b/tests/featurecounts_group_type.nf.test.snap index 4b07d4b71..5f34e3174 100644 --- a/tests/featurecounts_group_type.nf.test.snap +++ b/tests/featurecounts_group_type.nf.test.snap @@ -102,7 +102,7 @@ }, "Params: --featurecounts_group_type false": { "content": [ - 199, + 204, { "BBMAP_BBSPLIT": { "bbmap": 39.18 @@ -196,6 +196,9 @@ "SAMTOOLS_SORT": { "samtools": 1.21 }, + "SAMTOOLS_SORT_QUALIMAP": { + "samtools": 1.21 + }, "SAMTOOLS_STATS": { "samtools": 1.21 }, diff --git a/tests/hisat2.nf.test.snap b/tests/hisat2.nf.test.snap index cf66b6527..032160724 100644 --- a/tests/hisat2.nf.test.snap +++ b/tests/hisat2.nf.test.snap @@ -103,7 +103,7 @@ }, "Params: --aligner hisat2": { "content": [ - 200, + 205, { "BBMAP_BBSPLIT": { "bbmap": 39.18 @@ -203,6 +203,9 @@ "SAMTOOLS_SORT": { "samtools": 1.21 }, + "SAMTOOLS_SORT_QUALIMAP": { + "samtools": 1.21 + }, "SAMTOOLS_STATS": { "samtools": 1.21 }, diff --git a/tests/kallisto.nf.test b/tests/kallisto.nf.test index a67b2d314..8f974e1ab 100644 --- a/tests/kallisto.nf.test +++ b/tests/kallisto.nf.test @@ -12,6 +12,11 @@ nextflow_pipeline { pseudo_aligner = 'kallisto' skip_qc = true skip_alignment = true + // Disable spike-ins since we don't have a kallisto_index with spike-ins. + // Must also disable transcript_fasta because the test profile's transcriptome + // was generated with spike-ins - we need the pipeline to regenerate it. + additional_fasta = null + transcript_fasta = null } } @@ -46,6 +51,11 @@ nextflow_pipeline { pseudo_aligner = 'kallisto' skip_qc = true skip_alignment = true + // Disable spike-ins since we don't have a kallisto_index with spike-ins. + // Must also disable transcript_fasta because the test profile's transcriptome + // was generated with spike-ins - we need the pipeline to regenerate it. + additional_fasta = null + transcript_fasta = null } } diff --git a/tests/kallisto.nf.test.snap b/tests/kallisto.nf.test.snap index 6fbdcc695..e214e61b7 100644 --- a/tests/kallisto.nf.test.snap +++ b/tests/kallisto.nf.test.snap @@ -1,7 +1,7 @@ { "Params: --pseudo_aligner kallisto --skip_qc --skip_alignment": { "content": [ - 48, + 47, { "BBMAP_BBSPLIT": { "bbmap": 39.18 @@ -9,9 +9,6 @@ "CAT_FASTQ": { "cat": 9.5 }, - "CUSTOM_CATADDITIONALFASTA": { - "python": "3.12.2" - }, "CUSTOM_GETCHROMSIZES": { "getchromsizes": 1.21 }, @@ -30,9 +27,6 @@ "GTF_FILTER": { "python": "3.9.5" }, - "GUNZIP_ADDITIONAL_FASTA": { - "gunzip": 1.13 - }, "GUNZIP_GTF": { "gunzip": 1.13 }, @@ -42,6 +36,10 @@ "KALLISTO_QUANT": { "kallisto": "0.51.1" }, + "MAKE_TRANSCRIPTS_FASTA": { + "rsem": "1.3.1", + "star": "2.7.10a" + }, "SALMON_QUANT": { "salmon": "1.10.3" }, @@ -70,10 +68,6 @@ "bbsplit/RAP1_UNINDUCED_REP2.stats.txt", "bbsplit/WT_REP1.stats.txt", "bbsplit/WT_REP2.stats.txt", - "custom", - "custom/out", - "custom/out/genome_gfp.fasta", - "custom/out/genome_gfp.gtf", "fastqc", "fastqc/trim", "fastqc/trim/RAP1_IAA_30M_REP1_trimmed_1_val_1_fastqc.html", @@ -248,9 +242,7 @@ "trimgalore/WT_REP2_trimmed_2.fastq.gz_trimming_report.txt" ], [ - "genome_gfp.fasta:md5,e23e302af63736a199985a169fdac055", - "genome_gfp.gtf:md5,c98b12c302f15731bfc36bcf297cfe28", - "tx2gene.tsv:md5,0e2418a69d2eba45097ebffc2f700bfe", + "tx2gene.tsv:md5,1be389a28cc26d94b19ea918959ac72e", "cutadapt_filtered_reads_plot.txt:md5,6fa381627f7c1f664f3d4b2cb79cce90", "cutadapt_trimmed_sequences_plot_3_Counts.txt:md5,13dfa866fd91dbb072689efe9aa83b1f", "cutadapt_trimmed_sequences_plot_3_Obs_Exp.txt:md5,07145dd8dd3db654859b18eb0389046c", @@ -277,7 +269,7 @@ }, "Params: --pseudo_aligner kallisto --skip_qc --skip_alignment - stub": { "content": [ - 22, + 21, { "BBMAP_BBSPLIT": { "bbmap": 39.18 @@ -285,9 +277,6 @@ "CAT_FASTQ": { "cat": 9.5 }, - "CUSTOM_CATADDITIONALFASTA": { - "python": null - }, "CUSTOM_GETCHROMSIZES": { "getchromsizes": 1.21 }, @@ -300,15 +289,16 @@ "GTF_FILTER": { "python": "3.9.5" }, - "GUNZIP_ADDITIONAL_FASTA": { - "gunzip": 1.13 - }, "GUNZIP_GTF": { "gunzip": 1.13 }, "KALLISTO_INDEX": { "kallisto": "0.51.1" }, + "MAKE_TRANSCRIPTS_FASTA": { + "rsem": "1.3.1", + "star": "2.7.10a" + }, "TRIMGALORE": { "cutadapt": 4.9, "pigz": 2.8, @@ -319,10 +309,6 @@ } }, [ - "custom", - "custom/out", - "custom/out/genome_transcriptome.fasta", - "custom/out/genome_transcriptome.gtf", "fastqc", "fastqc/trim", "fq_lint", @@ -349,8 +335,6 @@ "trimgalore/WT_REP2_trimmed_2.fastq.gz_trimming_report.txt" ], [ - "genome_transcriptome.fasta:md5,d41d8cd98f00b204e9800998ecf8427e", - "genome_transcriptome.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "meta": { diff --git a/tests/min_mapped_reads.nf.test.snap b/tests/min_mapped_reads.nf.test.snap index 72ced7061..014c2f424 100644 --- a/tests/min_mapped_reads.nf.test.snap +++ b/tests/min_mapped_reads.nf.test.snap @@ -1,7 +1,7 @@ { "Params: --min_mapped_reads 90": { "content": [ - 163, + 166, { "BBMAP_BBSPLIT": { "bbmap": 39.18 @@ -98,6 +98,9 @@ "SAMTOOLS_SORT": { "samtools": 1.21 }, + "SAMTOOLS_SORT_QUALIMAP": { + "samtools": 1.21 + }, "SAMTOOLS_STATS": { "samtools": 1.21 }, diff --git a/tests/remove_ribo_rna.nf.test.snap b/tests/remove_ribo_rna.nf.test.snap index e2789c952..f6de3f886 100644 --- a/tests/remove_ribo_rna.nf.test.snap +++ b/tests/remove_ribo_rna.nf.test.snap @@ -1,7 +1,7 @@ { "Params: --remove_ribo_rna": { "content": [ - 220, + 225, { "BBMAP_BBSPLIT": { "bbmap": 39.18 @@ -98,6 +98,9 @@ "SAMTOOLS_SORT": { "samtools": 1.21 }, + "SAMTOOLS_SORT_QUALIMAP": { + "samtools": 1.21 + }, "SAMTOOLS_STATS": { "samtools": 1.21 }, diff --git a/tests/sentieon_default.nf.test.snap b/tests/sentieon_default.nf.test.snap index 6c9d283f5..69e433e8a 100644 --- a/tests/sentieon_default.nf.test.snap +++ b/tests/sentieon_default.nf.test.snap @@ -102,7 +102,7 @@ }, "Params: default --use_sentieon_star": { "content": [ - 209, + 214, { "BBMAP_BBSPLIT": { "bbmap": 39.18 @@ -199,6 +199,9 @@ "SAMTOOLS_SORT": { "samtools": 1.21 }, + "SAMTOOLS_SORT_QUALIMAP": { + "samtools": 1.21 + }, "SAMTOOLS_STATS": { "samtools": 1.21 }, diff --git a/tests/skip_trimming.nf.test.snap b/tests/skip_trimming.nf.test.snap index a95a80efe..e4e43d55b 100644 --- a/tests/skip_trimming.nf.test.snap +++ b/tests/skip_trimming.nf.test.snap @@ -1,7 +1,7 @@ { "Params: --skip_trimming": { "content": [ - 199, + 204, { "BBMAP_BBSPLIT": { "bbmap": 39.18 @@ -98,6 +98,9 @@ "SAMTOOLS_SORT": { "samtools": 1.21 }, + "SAMTOOLS_SORT_QUALIMAP": { + "samtools": 1.21 + }, "SAMTOOLS_STATS": { "samtools": 1.21 }, diff --git a/tests/star_rsem.nf.test.snap b/tests/star_rsem.nf.test.snap index f0f78f74f..68292fc9e 100644 --- a/tests/star_rsem.nf.test.snap +++ b/tests/star_rsem.nf.test.snap @@ -1,7 +1,7 @@ { "Params: --aligner star_rsem": { "content": [ - 207, + 212, { "BBMAP_BBSPLIT": { "bbmap": 39.18 @@ -109,6 +109,9 @@ "SAMTOOLS_SORT": { "samtools": 1.21 }, + "SAMTOOLS_SORT_QUALIMAP": { + "samtools": 1.21 + }, "SAMTOOLS_STATS": { "samtools": 1.21 }, diff --git a/tests/umi.nf.test.snap b/tests/umi.nf.test.snap index fc7524acf..8ddd1d27c 100644 --- a/tests/umi.nf.test.snap +++ b/tests/umi.nf.test.snap @@ -1,7 +1,7 @@ { "--umi_dedup_tool 'umitools'": { "content": [ - 261, + 266, { "BEDTOOLS_GENOMECOV_FW": { "bedtools": "2.31.1" @@ -92,6 +92,9 @@ "SAMTOOLS_SORT": { "samtools": 1.21 }, + "SAMTOOLS_SORT_QUALIMAP": { + "samtools": 1.21 + }, "SAMTOOLS_STATS": { "samtools": 1.21 }, @@ -1507,7 +1510,7 @@ }, "Params: --aligner hisat2 --umi_dedup_tool 'umicollapse'": { "content": [ - 194, + 199, { "BEDTOOLS_GENOMECOV_FW": { "bedtools": "2.31.1" @@ -1601,6 +1604,9 @@ "SAMTOOLS_SORT": { "samtools": 1.21 }, + "SAMTOOLS_SORT_QUALIMAP": { + "samtools": 1.21 + }, "SAMTOOLS_STATS": { "samtools": 1.21 }, diff --git a/workflows/rnaseq/assets/multiqc/multiqc_config.yml b/workflows/rnaseq/assets/multiqc/multiqc_config.yml index f34c955e6..2203a0e35 100644 --- a/workflows/rnaseq/assets/multiqc/multiqc_config.yml +++ b/workflows/rnaseq/assets/multiqc/multiqc_config.yml @@ -1,7 +1,7 @@ report_comment: > - This report has been generated by the nf-core/rnaseq analysis pipeline. For information about how - to interpret these results, please see the documentation. report_section_order: # Important checks and failures diff --git a/workflows/rnaseq/main.nf b/workflows/rnaseq/main.nf index d3a3ded0c..b61312ad3 100755 --- a/workflows/rnaseq/main.nf +++ b/workflows/rnaseq/main.nf @@ -1,6 +1,6 @@ /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT LOCAL MODULES/SUBWORKFLOWS + IMPORT MODULES/SUBWORKFLOWS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ @@ -47,6 +47,7 @@ include { MULTIQC } from '../../modules/nf-core/multiqc' include { BEDTOOLS_GENOMECOV as BEDTOOLS_GENOMECOV_FW } from '../../modules/nf-core/bedtools/genomecov' include { BEDTOOLS_GENOMECOV as BEDTOOLS_GENOMECOV_REV } from '../../modules/nf-core/bedtools/genomecov' include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index' +include { SAMTOOLS_SORT as SAMTOOLS_SORT_QUALIMAP } from '../../modules/nf-core/samtools/sort' // // SUBWORKFLOW: Consisting entirely of nf-core/modules @@ -499,6 +500,7 @@ workflow RNASEQ { // // MODULE: Genome-wide coverage with BEDTools + // Note: Strand parameters are conditional on library strandedness (see nextflow.config) // if (!params.skip_bigwig) { @@ -539,8 +541,15 @@ workflow RNASEQ { // if (!params.skip_qc) { if (!params.skip_qualimap) { - QUALIMAP_RNASEQ ( + // Sort BAM by name for qualimap (performance optimization) + SAMTOOLS_SORT_QUALIMAP ( ch_genome_bam, + ch_fasta.map { [ [:], it ] } + ) + ch_versions = ch_versions.mix(SAMTOOLS_SORT_QUALIMAP.out.versions.first()) + + QUALIMAP_RNASEQ ( + SAMTOOLS_SORT_QUALIMAP.out.bam, ch_gtf.map { [ [:], it ] } ) ch_multiqc_files = ch_multiqc_files.mix(QUALIMAP_RNASEQ.out.results.collect{it[1]}) diff --git a/workflows/rnaseq/nextflow.config b/workflows/rnaseq/nextflow.config index 7bd96fc31..d6f674c7a 100644 --- a/workflows/rnaseq/nextflow.config +++ b/workflows/rnaseq/nextflow.config @@ -36,16 +36,7 @@ if (params.aligner == 'star_salmon') { ] } - withName: '.*:QUANTIFY_STAR_SALMON:CUSTOM_TX2GENE' { - publishDir = [ - path: { "${params.outdir}/${params.aligner}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: '.*:QUANTIFY_STAR_SALMON:TXIMETA_TXIMPORT' { - ext.prefix = { "${quant_type}.merged" } + withName: '.*:QUANTIFY_STAR_SALMON:(CUSTOM_TX2GENE|TXIMETA_TXIMPORT)' { publishDir = [ path: { "${params.outdir}/${params.aligner}" }, mode: params.publish_dir_mode, @@ -61,17 +52,11 @@ if (params.aligner == 'star_salmon') { saveAs: { filename -> filename.equals('versions.yml') || filename.endsWith('.log') ? null : filename } ] } - withName: '.*:QUANTIFY_STAR_SALMON:SE_GENE_UNIFIED' { - ext.prefix = { "${params.pseudo_aligner}.merged.gene" } - } - withName: '.*:QUANTIFY_STAR_SALMON:SE_TRANSCRIPT_UNIFIED' { - ext.prefix = { "${params.pseudo_aligner}.merged.transcript" } - } } if (params.with_umi) { process { - withName: 'NFCORE_RNASEQ:RNASEQ:BAM_DEDUP_UMI_(STAR|HISAT2):SAMTOOLS_SORT' { + withName: 'NFCORE_RNASEQ:RNASEQ:BAM_DEDUP_UMI_STAR:SAMTOOLS_SORT' { ext.args = '-n' ext.prefix = { "${meta.id}.umi_dedup.transcriptome" } publishDir = [ @@ -82,7 +67,7 @@ if (params.aligner == 'star_salmon') { ] } - withName: 'NFCORE_RNASEQ:RNASEQ:BAM_DEDUP_UMI_(STAR|HISAT2):UMITOOLS_PREPAREFORRSEM' { + withName: 'NFCORE_RNASEQ:RNASEQ:BAM_DEDUP_UMI_STAR:UMITOOLS_PREPAREFORRSEM' { ext.prefix = { "${meta.id}.umi_dedup.transcriptome.filtered" } publishDir = [ [ @@ -101,19 +86,13 @@ if (params.aligner == 'star_salmon') { withName: 'NFCORE_RNASEQ:RNASEQ:BAM_DEDUP_UMI_STAR:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_SORT' { ext.prefix = { "${meta.id}.transcriptome.sorted" } - publishDir = [ - path: { params.save_align_intermeds || params.save_umi_intermeds ? "${params.outdir}/${params.aligner}" : params.outdir }, - mode: params.publish_dir_mode, - pattern: '*.bam', - saveAs: { params.save_align_intermeds || params.save_umi_intermeds ? it : null } - ] } - withName: 'NFCORE_RNASEQ:RNASEQ:BAM_DEDUP_UMI_STAR:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_INDEX' { + withName: 'NFCORE_RNASEQ:RNASEQ:BAM_DEDUP_UMI_STAR:BAM_SORT_STATS_SAMTOOLS:(SAMTOOLS_SORT|SAMTOOLS_INDEX)' { publishDir = [ path: { params.save_align_intermeds || params.save_umi_intermeds ? "${params.outdir}/${params.aligner}" : params.outdir }, mode: params.publish_dir_mode, - pattern: '*.bai', + pattern: '*.{bam,bai}', saveAs: { params.save_align_intermeds || params.save_umi_intermeds ? it : null } ] } @@ -212,31 +191,32 @@ process { withName: 'NFCORE_RNASEQ:RNASEQ:.*:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_SORT' { ext.prefix = { "${meta.id}.sorted" } - publishDir = [ - path: { ( ['star_salmon','hisat2'].contains(params.aligner) && - ( params.save_align_intermeds || ( !params.with_umi && params.skip_markduplicates ) ) - ) || params.save_align_intermeds || params.skip_markduplicates ? "${params.outdir}/${params.aligner}" : params.outdir }, - mode: params.publish_dir_mode, - pattern: "*.bam", - saveAs: { ( ['star_salmon','hisat2'].contains(params.aligner) && - ( params.save_align_intermeds || ( !params.with_umi && params.skip_markduplicates ) ) - ) || params.save_align_intermeds || params.skip_markduplicates ? it : null } - ] } withName: 'NFCORE_RNASEQ:RNASEQ:.*:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_INDEX' { ext.args = { params.bam_csi_index ? '-c' : '' } + } + + withName: 'NFCORE_RNASEQ:RNASEQ:.*:BAM_SORT_STATS_SAMTOOLS:(SAMTOOLS_SORT|SAMTOOLS_INDEX)' { publishDir = [ - path: { ( ['star_salmon','hisat2'].contains(params.aligner) && - ( params.save_align_intermeds || ( !params.with_umi && params.skip_markduplicates ) ) - ) || params.save_align_intermeds || params.skip_markduplicates ? "${params.outdir}/${params.aligner}" : params.outdir }, + path: { params.save_align_intermeds || params.skip_markduplicates ? "${params.outdir}/${params.aligner}" : params.outdir }, mode: params.publish_dir_mode, - pattern: "*.{bai,csi}", - saveAs: { ( ['star_salmon','hisat2'].contains(params.aligner) && - ( params.save_align_intermeds || ( !params.with_umi && params.skip_markduplicates ) ) - ) || params.save_align_intermeds || params.skip_markduplicates ? it : null } + pattern: "*.{bam,bai,csi}", + saveAs: { params.save_align_intermeds || params.skip_markduplicates ? it : null } ] } + + withName: '.*:(QUANTIFY_STAR_SALMON|QUANTIFY_PSEUDO_ALIGNMENT):TXIMETA_TXIMPORT' { + ext.prefix = { "${quant_type}.merged" } + } + + withName: '.*:(QUANTIFY_STAR_SALMON|QUANTIFY_PSEUDO_ALIGNMENT):SE_GENE_UNIFIED' { + ext.prefix = { "${params.pseudo_aligner}.merged.gene" } + } + + withName: '.*:(QUANTIFY_STAR_SALMON|QUANTIFY_PSEUDO_ALIGNMENT):SE_TRANSCRIPT_UNIFIED' { + ext.prefix = { "${params.pseudo_aligner}.merged.transcript" } + } } if (params.with_umi && ['star_salmon','hisat2'].contains(params.aligner)) { @@ -335,15 +315,15 @@ if (!params.skip_bigwig) { process { withName: 'BEDTOOLS_GENOMECOV_FW' { ext.prefix = { meta.strandedness == 'reverse' ? meta.id + '.reverse' : meta.id + '.forward' } - ext.args = '-split -du -strand + -bg' - publishDir = [ - enabled: false - ] + ext.args = { meta.strandedness == 'reverse' ? '-split -du -strand - -bg' : '-split -du -strand + -bg' } } withName: 'BEDTOOLS_GENOMECOV_REV' { ext.prefix = { meta.strandedness == 'reverse' ? meta.id + '.forward' : meta.id + '.reverse' } - ext.args = '-split -du -strand - -bg' + ext.args = { meta.strandedness == 'reverse' ? '-split -du -strand + -bg' : '-split -du -strand - -bg' } + } + + withName: '(BEDTOOLS_GENOMECOV_(FW|REV)|.*:BEDGRAPH_BEDCLIP_BEDGRAPHTOBIGWIG_(FORWARD|REVERSE):UCSC_BEDCLIP)' { publishDir = [ enabled: false ] @@ -351,29 +331,21 @@ if (!params.skip_bigwig) { withName: '.*:BEDGRAPH_BEDCLIP_BEDGRAPHTOBIGWIG_FORWARD:UCSC_BEDCLIP' { ext.prefix = { "${meta.id}.clip.forward" } - publishDir = [ - enabled: false - ] } withName: '.*:BEDGRAPH_BEDCLIP_BEDGRAPHTOBIGWIG_FORWARD:UCSC_BEDGRAPHTOBIGWIG' { ext.prefix = { "${meta.id}.forward" } - publishDir = [ - path: { "${params.outdir}/${params.aligner}/bigwig" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] } withName: '.*:BEDGRAPH_BEDCLIP_BEDGRAPHTOBIGWIG_REVERSE:UCSC_BEDCLIP' { ext.prefix = { "${meta.id}.clip.reverse" } - publishDir = [ - enabled: false - ] } withName: '.*:BEDGRAPH_BEDCLIP_BEDGRAPHTOBIGWIG_REVERSE:UCSC_BEDGRAPHTOBIGWIG' { ext.prefix = { "${meta.id}.reverse" } + } + + withName: '.*:BEDGRAPH_BEDCLIP_BEDGRAPHTOBIGWIG_(FORWARD|REVERSE):UCSC_BEDGRAPHTOBIGWIG' { publishDir = [ path: { "${params.outdir}/${params.aligner}/bigwig" }, mode: params.publish_dir_mode, @@ -387,44 +359,21 @@ if (!params.skip_bigwig) { // DESeq2 QC options // -if (params.aligner == 'star_salmon') { - if (!params.skip_qc & !params.skip_deseq2_qc) { - process { - withName: 'DESEQ2_QC_STAR_SALMON' { - ext.args = { [ - "--id_col 1", - "--sample_suffix ''", - "--count_col 3", - params.deseq2_vst ? '--vst TRUE' : '' - ].join(' ').trim() } - ext.args2 = 'star_salmon' - publishDir = [ - path: { "${params.outdir}/${params.aligner}/deseq2_qc" }, - mode: params.publish_dir_mode, - pattern: "*{RData,pca.vals.txt,plots.pdf,sample.dists.txt,size_factors,log}" - ] - } - } - } -} - -if (params.aligner == 'star_rsem') { - if (!params.skip_qc & !params.skip_deseq2_qc) { - process { - withName: 'DESEQ2_QC_RSEM' { - ext.args = { [ - "--id_col 1", - "--sample_suffix ''", - "--count_col 3", - params.deseq2_vst ? '--vst TRUE' : '' - ].join(' ').trim() } - ext.args2 = 'star_rsem' - publishDir = [ - path: { "${params.outdir}/${params.aligner}/deseq2_qc" }, - mode: params.publish_dir_mode, - pattern: "*{RData,pca.vals.txt,plots.pdf,sample.dists.txt,size_factors,log}" - ] - } +if (params.aligner in ['star_salmon', 'star_rsem'] && !params.skip_qc && !params.skip_deseq2_qc) { + process { + withName: 'DESEQ2_QC_(STAR_SALMON|RSEM)' { + ext.args = { [ + "--id_col 1", + "--sample_suffix ''", + "--count_col 3", + params.deseq2_vst ? '--vst TRUE' : '' + ].join(' ').trim() } + ext.args2 = { params.aligner } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/deseq2_qc" }, + mode: params.publish_dir_mode, + pattern: "*{RData,pca.vals.txt,plots.pdf,sample.dists.txt,size_factors,log}" + ] } } } @@ -435,16 +384,7 @@ if (params.aligner == 'star_rsem') { if (!params.skip_pseudo_alignment && params.pseudo_aligner) { process { - withName: '.*:QUANTIFY_PSEUDO_ALIGNMENT:CUSTOM_TX2GENE' { - publishDir = [ - path: { "${params.outdir}/${params.pseudo_aligner}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: '.*:QUANTIFY_PSEUDO_ALIGNMENT:TXIMETA_TXIMPORT' { - ext.prefix = { "${quant_type}.merged" } + withName: '.*:QUANTIFY_PSEUDO_ALIGNMENT:(CUSTOM_TX2GENE|TXIMETA_TXIMPORT)' { publishDir = [ path: { "${params.outdir}/${params.pseudo_aligner}" }, mode: params.publish_dir_mode, @@ -460,15 +400,9 @@ if (!params.skip_pseudo_alignment && params.pseudo_aligner) { saveAs: { filename -> filename.equals('versions.yml') || filename.endsWith('.log') ? null : filename } ] } - withName: '.*:QUANTIFY_PSEUDO_ALIGNMENT:SE_GENE_UNIFIED' { - ext.prefix = { "${params.pseudo_aligner}.merged.gene" } - } - withName: '.*:QUANTIFY_PSEUDO_ALIGNMENT:SE_TRANSCRIPT_UNIFIED' { - ext.prefix = { "${params.pseudo_aligner}.merged.transcript" } - } } - if (!params.skip_qc & !params.skip_deseq2_qc) { + if (!params.skip_qc && !params.skip_deseq2_qc) { process { withName: 'DESEQ2_QC_PSEUDO' { ext.args = { [