Add centrifuge module (#1410)

* Add centrifuge module

* Add centrifuge module

* Add centrifuge module

* Add centrifuge module
This commit is contained in:
Sofia Stamouli 2022-03-18 15:39:27 +01:00 committed by GitHub
parent f080015754
commit ea41a8a6f7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 204 additions and 4 deletions

View file

@ -0,0 +1,63 @@
process CENTRIFUGE {
tag "$meta.id"
label 'process_high'
conda (params.enable_conda ? "bioconda::centrifuge=1.0.4_beta" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/centrifuge:1.0.4_beta--h9a82719_6' :
'quay.io/biocontainers/centrifuge:1.0.4_beta--h9a82719_6' }"
input:
tuple val(meta), path(reads)
path db
val save_unaligned
val save_aligned
val sam_format
output:
tuple val(meta), path('*report.txt') , emit: report
tuple val(meta), path('*results.txt') , emit: results
tuple val(meta), path('*kreport.txt') , emit: kreport
tuple val(meta), path('*.sam') , optional: true, emit: sam
tuple val(meta), path('*.mapped.fastq{,.1,.2}.gz') , optional: true, emit: fastq_mapped
tuple val(meta), path('*.unmapped.fastq{,.1,.2}.gz') , optional: true, emit: fastq_unmapped
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def paired = meta.single_end ? "-U ${reads}" : "-1 ${reads[0]} -2 ${reads[1]}"
def db_name = db.toString().replace(".tar.gz","")
def unaligned = ''
def aligned = ''
if (meta.single_end) {
unaligned = save_unaligned ? "--un-gz ${prefix}.unmapped.fastq.gz" : ''
aligned = save_aligned ? "--al-gz ${prefix}.mapped.fastq.gz" : ''
} else {
unaligned = save_unaligned ? "--un-conc-gz ${prefix}.unmapped.fastq.gz" : ''
aligned = save_aligned ? "--al-conc-gz ${prefix}.mapped.fastq.gz" : ''
}
def sam_output = sam_format ? "--out-fmt 'sam'" : ''
"""
tar -xf $db
centrifuge \\
-x $db_name \\
-p $task.cpus \\
$paired \\
--report-file ${prefix}.report.txt \\
-S ${prefix}.results.txt \\
$unaligned \\
$aligned \\
$sam_output \\
$args
centrifuge-kreport -x $db_name ${prefix}.results.txt > ${prefix}.kreport.txt
cat <<-END_VERSIONS > versions.yml
"${task.process}":
centrifuge: \$( centrifuge --version | sed -n 1p | sed 's/^.*centrifuge-class version //')
END_VERSIONS
"""
}

View file

@ -0,0 +1,73 @@
name: centrifuge
description: Classifies metagenomic sequence data
keywords:
- classify
- metagenomics
- fastq
- db
tools:
- centrifuge:
description: Centrifuge is a classifier for metagenomic sequences.
homepage: https://ccb.jhu.edu/software/centrifuge/
documentation: https://ccb.jhu.edu/software/centrifuge/manual.shtml
doi: 10.1101/gr.210641.116
licence: ["GPL v3"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- reads:
type: file
description: |
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
respectively.
- db:
type: directory
description: Centrifuge database in .tar.gz format
pattern: "*.tar.gz"
- save_unaligned:
type: value
description: If true unmapped fastq files are saved
- save_aligned:
type: value
description: If true mapped fastq files are saved
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- report:
type: file
description: |
File containing a classification summary
pattern: "*.{report.txt}"
- results:
type: file
description: |
File containing classification results
pattern: "*.{results.txt}"
- kreport:
type: file
description: |
File containing kraken-style report from centrifuge
out files.
pattern: "*.{kreport.txt}"
- fastq_unmapped:
type: file
description: Unmapped fastq files
pattern: "*.unmapped.fastq.gz"
- fastq_mapped:
type: file
description: Mapped fastq files
pattern: "*.mapped.fastq.gz"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@sofstam"
- "@jfy133"
- "@sateeshperi"

View file

@ -202,6 +202,10 @@ bedtools/subtract:
- modules/bedtools/subtract/** - modules/bedtools/subtract/**
- tests/modules/bedtools/subtract/** - tests/modules/bedtools/subtract/**
biobambam/bammarkduplicates2:
- modules/biobambam/bammarkduplicates2/**
- tests/modules/biobambam/bammarkduplicates2/**
biscuit/align: biscuit/align:
- modules/biscuit/index/** - modules/biscuit/index/**
- modules/biscuit/align/** - modules/biscuit/align/**
@ -245,10 +249,6 @@ biscuit/vcf2bed:
- modules/biscuit/vcf2bed/** - modules/biscuit/vcf2bed/**
- tests/modules/biscuit/vcf2bed/** - tests/modules/biscuit/vcf2bed/**
biobambam/bammarkduplicates2:
- modules/biobambam/bammarkduplicates2/**
- tests/modules/biobambam/bammarkduplicates2/**
bismark/align: bismark/align:
- modules/bismark/align/** - modules/bismark/align/**
- modules/bismark/genomepreparation/** - modules/bismark/genomepreparation/**
@ -379,6 +379,10 @@ cellranger/mkref:
- modules/cellranger/gtf/** - modules/cellranger/gtf/**
- tests/modules/cellranger/gtf/** - tests/modules/cellranger/gtf/**
centrifuge:
- modules/centrifuge/**
- tests/modules/centrifuge/**
checkm/lineagewf: checkm/lineagewf:
- modules/checkm/lineagewf/** - modules/checkm/lineagewf/**
- tests/modules/checkm/lineagewf/** - tests/modules/checkm/lineagewf/**

View file

@ -0,0 +1,33 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { CENTRIFUGE } from '../../../modules/centrifuge/main.nf'
workflow test_centrifuge_single_end {
input = [ [ id:'test', single_end:true ], // meta map
[ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ]
]
db = file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/minigut_cf.tar.gz", checkIfExists: true)
save_unaligned = true
save_aligned = false
sam_format = false
CENTRIFUGE ( input, db, save_unaligned, save_aligned, sam_format )
}
workflow test_centrifuge_paired_end {
input = [ [ id:'test', single_end:false ], // meta map
[ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
]
db = file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/minigut_cf.tar.gz", checkIfExists: true)
save_unaligned = true
save_aligned = false
sam_format = false
CENTRIFUGE ( input, db, save_unaligned, save_aligned, sam_format )
}

View file

@ -0,0 +1,5 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}

View file

@ -0,0 +1,22 @@
- name: centrifuge test_centrifuge_single_end
command: nextflow run tests/modules/centrifuge -entry test_centrifuge_single_end -c tests/config/nextflow.config
tags:
- centrifuge
files:
- path: output/centrifuge/test.kreport.txt
- path: output/centrifuge/test.report.txt
- path: output/centrifuge/test.results.txt
- path: output/centrifuge/test.unmapped.fastq.gz
- path: output/centrifuge/versions.yml
- name: centrifuge test_centrifuge_paired_end
command: nextflow run tests/modules/centrifuge -entry test_centrifuge_paired_end -c tests/config/nextflow.config
tags:
- centrifuge
files:
- path: output/centrifuge/test.kreport.txt
- path: output/centrifuge/test.report.txt
- path: output/centrifuge/test.results.txt
- path: output/centrifuge/test.unmapped.fastq.1.gz
- path: output/centrifuge/test.unmapped.fastq.2.gz
- path: output/centrifuge/versions.yml