Add bracken (#1423)

* feat: add template for Bracken

* chore: update version

* refactor: change command build

* refactor: rename report variable, change quotes

* docs: remove refactored input parameter

* fix: correctly assign arguments to options

* tests: set up single and paired end tests

* style: apply prettier

* chore: change data sources to official ones

* refactor: rename test workflows

* tests: use correct input to the new UNTAR module

* chore: update md5sums
This commit is contained in:
Moritz E. Beber 2022-03-22 16:40:21 +01:00 committed by GitHub
parent e080f4c8ac
commit 9c386c5dd8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 198 additions and 0 deletions

View file

@ -0,0 +1,41 @@
process BRACKEN_BRACKEN {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "bioconda::bracken=2.6.2" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/bracken:2.6.2--py39hc16433a_0':
'quay.io/biocontainers/bracken:2.6.2--py39hc16433a_0' }"
input:
tuple val(meta), path(kraken_report)
path database
output:
tuple val(meta), path(bracken_report), emit: reports
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def threshold = meta.threshold ?: 10
def taxonomic_level = meta.taxonomic_level ?: 'S'
def read_length = meta.read_length ?: 150
def args = task.ext.args ?: "-l ${taxonomic_level} -t ${threshold} -r ${read_length}"
def prefix = task.ext.prefix ?: "${meta.id}"
def bracken_version = '2.6.2'
bracken_report = "${prefix}_${taxonomic_level}.tsv"
"""
bracken \\
${args} \\
-d '${database}' \\
-i '${kraken_report}' \\
-o '${bracken_report}'
cat <<-END_VERSIONS > versions.yml
"${task.process}":
bracken: ${bracken_version}
END_VERSIONS
"""
}

View file

@ -0,0 +1,45 @@
name: bracken_bracken
description: Re-estimate taxonomic abundance of metagenomic samples analyzed by kraken.
keywords:
- sort
tools:
- bracken:
description: Bracken (Bayesian Reestimation of Abundance with KrakEN) is a highly accurate statistical method that computes the abundance of species in DNA sequences from a metagenomics sample.
homepage: https://ccb.jhu.edu/software/bracken/
documentation: https://ccb.jhu.edu/software/bracken/index.shtml?t=manual
tool_dev_url: https://github.com/jenniferlu717/Bracken
doi: "10.7717/peerj-cs.104"
licence: ["GPL v3"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- kraken_report:
type: file
description: TSV file with six columns coming from kraken2 output
pattern: "*.{tsv}"
- database:
type: file
description: Directory containing the kraken2/Bracken files for analysis
pattern: "*"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- reports:
type: file
description: TSV output report of the re-estimated abundances
pattern: "*.{tsv}"
authors:
- "@Midnighter"

View file

@ -313,6 +313,10 @@ bowtie2/build:
- modules/bowtie2/build/**
- tests/modules/bowtie2/build_test/**
bracken/bracken:
- modules/bracken/bracken/**
- tests/modules/bracken/bracken/**
bwa/aln:
- modules/bwa/aln/**
- tests/modules/bwa/aln/**

View file

@ -25,6 +25,9 @@ params {
kraken2 = "${test_data_dir}/genomics/sarscov2/genome/db/kraken2"
kraken2_tar_gz = "${test_data_dir}/genomics/sarscov2/genome/db/kraken2.tar.gz"
kraken2_bracken = "${test_data_dir}/genomics/sarscov2/genome/db/kraken2_bracken"
kraken2_bracken_tar_gz = "${test_data_dir}/genomics/sarscov2/genome/db/kraken2_bracken.tar.gz"
ncbi_taxmap_zip = "${test_data_dir}/genomics/sarscov2/genome/db/maltextract/ncbi_taxmap.zip"
taxon_list_txt = "${test_data_dir}/genomics/sarscov2/genome/db/maltextract/taxon_list.txt"

View file

@ -0,0 +1,57 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { UNTAR } from '../../../../modules/untar/main.nf'
include { KRAKEN2_KRAKEN2 } from '../../../../modules/kraken2/kraken2/main.nf'
include { BRACKEN_BRACKEN } from '../../../../modules/bracken/bracken/main.nf'
workflow test_bracken_bracken_single_end_default_args {
input = [ [ id:'test', single_end:true ], // meta map
[ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ]
]
db = file(params.test_data['sarscov2']['genome']['kraken2_bracken_tar_gz'], checkIfExists: true)
ch_db = UNTAR ( [[:], db] ).untar
.map { it[1] }
KRAKEN2_KRAKEN2 ( input, ch_db )
BRACKEN_BRACKEN ( KRAKEN2_KRAKEN2.out.txt, ch_db )
}
workflow test_bracken_bracken_single_end_custom_args {
input = [ [ id:'test', single_end:true, threshold:0, taxonomic_level:'G', read_length:100 ], // meta map
[ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ]
]
db = file(params.test_data['sarscov2']['genome']['kraken2_bracken_tar_gz'], checkIfExists: true)
ch_db = UNTAR ( [[:], db] ).untar
.map { it[1] }
KRAKEN2_KRAKEN2 ( input, ch_db )
BRACKEN_BRACKEN ( KRAKEN2_KRAKEN2.out.txt, ch_db )
}
workflow test_bracken_bracken_paired_end_default_args {
input = [ [ id:'test', single_end:false ], // meta map
[ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
]
db = file(params.test_data['sarscov2']['genome']['kraken2_bracken_tar_gz'], checkIfExists: true)
ch_db = UNTAR ( [[:], db] ).untar
.map { it[1] }
KRAKEN2_KRAKEN2 ( input, ch_db )
BRACKEN_BRACKEN ( KRAKEN2_KRAKEN2.out.txt, ch_db )
}
workflow test_bracken_bracken_paired_end_custom_args {
input = [ [ id:'test', single_end:false, threshold:0, taxonomic_level:'G', read_length:100 ], // meta map
[ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
]
db = file(params.test_data['sarscov2']['genome']['kraken2_bracken_tar_gz'], checkIfExists: true)
ch_db = UNTAR ( [[:], db] ).untar
.map { it[1] }
KRAKEN2_KRAKEN2 ( input, ch_db )
BRACKEN_BRACKEN ( KRAKEN2_KRAKEN2.out.txt, ch_db )
}

View file

@ -0,0 +1,5 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}

View file

@ -0,0 +1,43 @@
- name: bracken bracken test_bracken_bracken_single_end_default_args
command: nextflow run tests/modules/bracken/bracken -entry test_bracken_bracken_single_end_default_args -c tests/config/nextflow.config
tags:
- bracken/bracken
- bracken
files:
- path: output/bracken/test_S.tsv
md5sum: 4a21ae14ff8a0311d55f139af5247838
- path: output/bracken/versions.yml
md5sum: ab8b1550f84a99bae80f050fe718abd0
- name: bracken bracken test_bracken_bracken_single_end_custom_args
command: nextflow run tests/modules/bracken/bracken -entry test_bracken_bracken_single_end_custom_args -c tests/config/nextflow.config
tags:
- bracken/bracken
- bracken
files:
- path: output/bracken/test_G.tsv
md5sum: f609b09d6edb5ebc1ea1435d1dd46cde
- path: output/bracken/versions.yml
md5sum: af87e8d4c42fbcb0469ab13912b8a9bd
- name: bracken bracken test_bracken_bracken_paired_end_default_args
command: nextflow run tests/modules/bracken/bracken -entry test_bracken_bracken_paired_end_default_args -c tests/config/nextflow.config
tags:
- bracken/bracken
- bracken
files:
- path: output/bracken/test_S.tsv
md5sum: 4a21ae14ff8a0311d55f139af5247838
- path: output/bracken/versions.yml
md5sum: 4602111eb25bd19a7f9d725acc5921f6
- name: bracken bracken test_bracken_bracken_paired_end_custom_args
command: nextflow run tests/modules/bracken/bracken -entry test_bracken_bracken_paired_end_custom_args -c tests/config/nextflow.config
tags:
- bracken/bracken
- bracken
files:
- path: output/bracken/test_G.tsv
md5sum: f609b09d6edb5ebc1ea1435d1dd46cde
- path: output/bracken/versions.yml
md5sum: d4618b01df5ac09cc366fe2ae7c13f06