1
0
Fork 0
mirror of https://github.com/MillironX/taxprofiler.git synced 2024-09-21 00:42:04 +00:00

Add centrifuge classification

This commit is contained in:
sofstam 2022-03-24 12:51:45 +01:00
parent 358b89a4c6
commit 3ff54e620e
9 changed files with 179 additions and 8 deletions

View file

@ -121,4 +121,14 @@ process {
]
}
withName: CENTRIFUGE {
publishDir = [
path: { "${params.outdir}/centrifuge/${meta.db_name}" },
mode: 'copy',
pattern: '*.{fastq.gz,txt}'
]
ext.args = { "${meta.db_params}" }
ext.prefix = { "${meta.id}-${meta.db_name}" }
}
}

View file

@ -29,5 +29,6 @@ params {
run_kraken2 = true
run_malt = true
shortread_clipmerge = true
run_centrifuge = true
}

View file

@ -29,6 +29,9 @@
"porechop": {
"git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046"
}
"centrifuge": {
"git_sha": "ea41a8a6f761b9993d857570e872abaae3fea555"
}
}
}
}
}

View file

@ -0,0 +1,63 @@
process CENTRIFUGE {
tag "$meta.id"
label 'process_high'
conda (params.enable_conda ? "bioconda::centrifuge=1.0.4_beta" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/centrifuge:1.0.4_beta--h9a82719_6' :
'quay.io/biocontainers/centrifuge:1.0.4_beta--h9a82719_6' }"
input:
tuple val(meta), path(reads)
path db
val save_unaligned
val save_aligned
val sam_format
output:
tuple val(meta), path('*report.txt') , emit: report
tuple val(meta), path('*results.txt') , emit: results
tuple val(meta), path('*kreport.txt') , emit: kreport
tuple val(meta), path('*.sam') , optional: true, emit: sam
tuple val(meta), path('*.mapped.fastq{,.1,.2}.gz') , optional: true, emit: fastq_mapped
tuple val(meta), path('*.unmapped.fastq{,.1,.2}.gz') , optional: true, emit: fastq_unmapped
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def paired = meta.single_end ? "-U ${reads}" : "-1 ${reads[0]} -2 ${reads[1]}"
def db_name = db.toString().replace(".tar.gz","")
def unaligned = ''
def aligned = ''
if (meta.single_end) {
unaligned = save_unaligned ? "--un-gz ${prefix}.unmapped.fastq.gz" : ''
aligned = save_aligned ? "--al-gz ${prefix}.mapped.fastq.gz" : ''
} else {
unaligned = save_unaligned ? "--un-conc-gz ${prefix}.unmapped.fastq.gz" : ''
aligned = save_aligned ? "--al-conc-gz ${prefix}.mapped.fastq.gz" : ''
}
def sam_output = sam_format ? "--out-fmt 'sam'" : ''
"""
tar -xf $db
centrifuge \\
-x $db_name \\
-p $task.cpus \\
$paired \\
--report-file ${prefix}.report.txt \\
-S ${prefix}.results.txt \\
$unaligned \\
$aligned \\
$sam_output \\
$args
centrifuge-kreport -x $db_name ${prefix}.results.txt > ${prefix}.kreport.txt
cat <<-END_VERSIONS > versions.yml
"${task.process}":
centrifuge: \$( centrifuge --version | sed -n 1p | sed 's/^.*centrifuge-class version //')
END_VERSIONS
"""
}

View file

@ -0,0 +1,73 @@
name: centrifuge
description: Classifies metagenomic sequence data
keywords:
- classify
- metagenomics
- fastq
- db
tools:
- centrifuge:
description: Centrifuge is a classifier for metagenomic sequences.
homepage: https://ccb.jhu.edu/software/centrifuge/
documentation: https://ccb.jhu.edu/software/centrifuge/manual.shtml
doi: 10.1101/gr.210641.116
licence: ["GPL v3"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- reads:
type: file
description: |
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
respectively.
- db:
type: directory
description: Centrifuge database in .tar.gz format
pattern: "*.tar.gz"
- save_unaligned:
type: value
description: If true unmapped fastq files are saved
- save_aligned:
type: value
description: If true mapped fastq files are saved
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- report:
type: file
description: |
File containing a classification summary
pattern: "*.{report.txt}"
- results:
type: file
description: |
File containing classification results
pattern: "*.{results.txt}"
- kreport:
type: file
description: |
File containing kraken-style report from centrifuge
out files.
pattern: "*.{kreport.txt}"
- fastq_unmapped:
type: file
description: Unmapped fastq files
pattern: "*.unmapped.fastq.gz"
- fastq_mapped:
type: file
description: Mapped fastq files
pattern: "*.mapped.fastq.gz"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@sofstam"
- "@jfy133"
- "@sateeshperi"

View file

@ -56,7 +56,7 @@ params {
// FASTQ preprocessing
shortread_clipmerge = false
shortread_excludeunmerged = true
shortread_excludeunmerged = true
longread_clip = false
// MALT
@ -65,6 +65,12 @@ params {
// kraken2
run_kraken2 = false
// centrifuge
run_centrifuge = false
save_unaligned = false
save_aligned = false
sam_format = false
}
// Load base.config by default for all pipelines

View file

@ -21,7 +21,7 @@ workflow DB_CHECK {
ch_dbs_for_untar = parsed_samplesheet
.branch {
untar: it[1].toString().endsWith(".tar.gz")
untar: it[1].toString().endsWith(".tar.gz") && it[0]['tool']!="centrifuge"
skip: true
}

View file

@ -67,8 +67,9 @@ def create_fastq_channel(LinkedHashMap row) {
if (!file(row.fastq_2).exists()) {
exit 1, "ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${row.fastq_2}"
}
fastq_meta = [ meta, [ file(row.fastq_1), file(row.fastq_2) ] ]
fastq_meta = [ meta, [ file(row.fastq_1), file(row.fastq_2) ] ]
}
}
return fastq_meta
}

View file

@ -58,7 +58,7 @@ include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/modules/custom/
include { CAT_FASTQ } from '../modules/nf-core/modules/cat/fastq/main'
include { MALT_RUN } from '../modules/nf-core/modules/malt/run/main'
include { KRAKEN2_KRAKEN2 } from '../modules/nf-core/modules/kraken2/kraken2/main'
include { CENTRIFUGE } from '../modules/nf-core/modules/centrifuge/main'
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -149,9 +149,10 @@ workflow TAXPROFILER {
.combine(DB_CHECK.out.dbs)
.dump(tag: "reads_plus_db")
.branch {
malt: it[2]['tool'] == 'malt'
kraken2: it[2]['tool'] == 'kraken2'
unknown: true
malt: it[2]['tool'] == 'malt'
kraken2: it[2]['tool'] == 'kraken2'
centrifuge: it[2]['tool'] == 'centrifuge'
unknown: true
}
//
@ -184,6 +185,15 @@ workflow TAXPROFILER {
db: it[3]
}
// We can run centrifuge one-by-one sample-wise
ch_input_for_centrifuge = ch_input_for_profiling.centrifuge
.dump(tag: "input for centrifuge")
.multiMap {
it ->
reads: [ it[0] + it[2], it[1] ]
db: it[3]
}
//
// RUN PROFILING
//
@ -195,6 +205,10 @@ workflow TAXPROFILER {
KRAKEN2_KRAKEN2 ( ch_input_for_kraken2.reads, ch_input_for_kraken2.db )
}
if ( params.run_centrifuge ) {
CENTRIFUGE ( ch_input_for_centrifuge.reads, ch_input_for_centrifuge.db, params.save_unaligned, params.save_aligned, params.sam_format )
}
//
// MODULE: MultiQC
//