mirror of
https://github.com/MillironX/taxprofiler.git
synced 2024-12-22 15:08:17 +00:00
Add centrifuge classification
This commit is contained in:
parent
358b89a4c6
commit
3ff54e620e
9 changed files with 179 additions and 8 deletions
|
@ -121,4 +121,14 @@ process {
|
|||
]
|
||||
}
|
||||
|
||||
withName: CENTRIFUGE {
|
||||
publishDir = [
|
||||
path: { "${params.outdir}/centrifuge/${meta.db_name}" },
|
||||
mode: 'copy',
|
||||
pattern: '*.{fastq.gz,txt}'
|
||||
]
|
||||
ext.args = { "${meta.db_params}" }
|
||||
ext.prefix = { "${meta.id}-${meta.db_name}" }
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -29,5 +29,6 @@ params {
|
|||
run_kraken2 = true
|
||||
run_malt = true
|
||||
shortread_clipmerge = true
|
||||
run_centrifuge = true
|
||||
|
||||
}
|
||||
|
|
|
@ -29,6 +29,9 @@
|
|||
"porechop": {
|
||||
"git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046"
|
||||
}
|
||||
"centrifuge": {
|
||||
"git_sha": "ea41a8a6f761b9993d857570e872abaae3fea555"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
63
modules/nf-core/modules/centrifuge/main.nf
generated
Normal file
63
modules/nf-core/modules/centrifuge/main.nf
generated
Normal file
|
@ -0,0 +1,63 @@
|
|||
process CENTRIFUGE {
|
||||
tag "$meta.id"
|
||||
label 'process_high'
|
||||
|
||||
conda (params.enable_conda ? "bioconda::centrifuge=1.0.4_beta" : null)
|
||||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||
'https://depot.galaxyproject.org/singularity/centrifuge:1.0.4_beta--h9a82719_6' :
|
||||
'quay.io/biocontainers/centrifuge:1.0.4_beta--h9a82719_6' }"
|
||||
|
||||
input:
|
||||
tuple val(meta), path(reads)
|
||||
path db
|
||||
val save_unaligned
|
||||
val save_aligned
|
||||
val sam_format
|
||||
|
||||
output:
|
||||
tuple val(meta), path('*report.txt') , emit: report
|
||||
tuple val(meta), path('*results.txt') , emit: results
|
||||
tuple val(meta), path('*kreport.txt') , emit: kreport
|
||||
tuple val(meta), path('*.sam') , optional: true, emit: sam
|
||||
tuple val(meta), path('*.mapped.fastq{,.1,.2}.gz') , optional: true, emit: fastq_mapped
|
||||
tuple val(meta), path('*.unmapped.fastq{,.1,.2}.gz') , optional: true, emit: fastq_unmapped
|
||||
path "versions.yml" , emit: versions
|
||||
|
||||
when:
|
||||
task.ext.when == null || task.ext.when
|
||||
|
||||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
def paired = meta.single_end ? "-U ${reads}" : "-1 ${reads[0]} -2 ${reads[1]}"
|
||||
def db_name = db.toString().replace(".tar.gz","")
|
||||
def unaligned = ''
|
||||
def aligned = ''
|
||||
if (meta.single_end) {
|
||||
unaligned = save_unaligned ? "--un-gz ${prefix}.unmapped.fastq.gz" : ''
|
||||
aligned = save_aligned ? "--al-gz ${prefix}.mapped.fastq.gz" : ''
|
||||
} else {
|
||||
unaligned = save_unaligned ? "--un-conc-gz ${prefix}.unmapped.fastq.gz" : ''
|
||||
aligned = save_aligned ? "--al-conc-gz ${prefix}.mapped.fastq.gz" : ''
|
||||
}
|
||||
def sam_output = sam_format ? "--out-fmt 'sam'" : ''
|
||||
"""
|
||||
tar -xf $db
|
||||
centrifuge \\
|
||||
-x $db_name \\
|
||||
-p $task.cpus \\
|
||||
$paired \\
|
||||
--report-file ${prefix}.report.txt \\
|
||||
-S ${prefix}.results.txt \\
|
||||
$unaligned \\
|
||||
$aligned \\
|
||||
$sam_output \\
|
||||
$args
|
||||
centrifuge-kreport -x $db_name ${prefix}.results.txt > ${prefix}.kreport.txt
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
centrifuge: \$( centrifuge --version | sed -n 1p | sed 's/^.*centrifuge-class version //')
|
||||
END_VERSIONS
|
||||
"""
|
||||
}
|
73
modules/nf-core/modules/centrifuge/meta.yml
generated
Normal file
73
modules/nf-core/modules/centrifuge/meta.yml
generated
Normal file
|
@ -0,0 +1,73 @@
|
|||
name: centrifuge
|
||||
description: Classifies metagenomic sequence data
|
||||
keywords:
|
||||
- classify
|
||||
- metagenomics
|
||||
- fastq
|
||||
- db
|
||||
tools:
|
||||
- centrifuge:
|
||||
description: Centrifuge is a classifier for metagenomic sequences.
|
||||
homepage: https://ccb.jhu.edu/software/centrifuge/
|
||||
documentation: https://ccb.jhu.edu/software/centrifuge/manual.shtml
|
||||
doi: 10.1101/gr.210641.116
|
||||
licence: ["GPL v3"]
|
||||
input:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- reads:
|
||||
type: file
|
||||
description: |
|
||||
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
|
||||
respectively.
|
||||
- db:
|
||||
type: directory
|
||||
description: Centrifuge database in .tar.gz format
|
||||
pattern: "*.tar.gz"
|
||||
- save_unaligned:
|
||||
type: value
|
||||
description: If true unmapped fastq files are saved
|
||||
- save_aligned:
|
||||
type: value
|
||||
description: If true mapped fastq files are saved
|
||||
output:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- report:
|
||||
type: file
|
||||
description: |
|
||||
File containing a classification summary
|
||||
pattern: "*.{report.txt}"
|
||||
- results:
|
||||
type: file
|
||||
description: |
|
||||
File containing classification results
|
||||
pattern: "*.{results.txt}"
|
||||
- kreport:
|
||||
type: file
|
||||
description: |
|
||||
File containing kraken-style report from centrifuge
|
||||
out files.
|
||||
pattern: "*.{kreport.txt}"
|
||||
- fastq_unmapped:
|
||||
type: file
|
||||
description: Unmapped fastq files
|
||||
pattern: "*.unmapped.fastq.gz"
|
||||
- fastq_mapped:
|
||||
type: file
|
||||
description: Mapped fastq files
|
||||
pattern: "*.mapped.fastq.gz"
|
||||
- versions:
|
||||
type: file
|
||||
description: File containing software versions
|
||||
pattern: "versions.yml"
|
||||
authors:
|
||||
- "@sofstam"
|
||||
- "@jfy133"
|
||||
- "@sateeshperi"
|
|
@ -56,7 +56,7 @@ params {
|
|||
|
||||
// FASTQ preprocessing
|
||||
shortread_clipmerge = false
|
||||
shortread_excludeunmerged = true
|
||||
shortread_excludeunmerged = true
|
||||
longread_clip = false
|
||||
|
||||
// MALT
|
||||
|
@ -65,6 +65,12 @@ params {
|
|||
|
||||
// kraken2
|
||||
run_kraken2 = false
|
||||
|
||||
// centrifuge
|
||||
run_centrifuge = false
|
||||
save_unaligned = false
|
||||
save_aligned = false
|
||||
sam_format = false
|
||||
}
|
||||
|
||||
// Load base.config by default for all pipelines
|
||||
|
|
|
@ -21,7 +21,7 @@ workflow DB_CHECK {
|
|||
|
||||
ch_dbs_for_untar = parsed_samplesheet
|
||||
.branch {
|
||||
untar: it[1].toString().endsWith(".tar.gz")
|
||||
untar: it[1].toString().endsWith(".tar.gz") && it[0]['tool']!="centrifuge"
|
||||
skip: true
|
||||
}
|
||||
|
||||
|
|
|
@ -67,8 +67,9 @@ def create_fastq_channel(LinkedHashMap row) {
|
|||
if (!file(row.fastq_2).exists()) {
|
||||
exit 1, "ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${row.fastq_2}"
|
||||
}
|
||||
fastq_meta = [ meta, [ file(row.fastq_1), file(row.fastq_2) ] ]
|
||||
fastq_meta = [ meta, [ file(row.fastq_1), file(row.fastq_2) ] ]
|
||||
}
|
||||
|
||||
}
|
||||
return fastq_meta
|
||||
}
|
||||
|
|
|
@ -58,7 +58,7 @@ include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/modules/custom/
|
|||
include { CAT_FASTQ } from '../modules/nf-core/modules/cat/fastq/main'
|
||||
include { MALT_RUN } from '../modules/nf-core/modules/malt/run/main'
|
||||
include { KRAKEN2_KRAKEN2 } from '../modules/nf-core/modules/kraken2/kraken2/main'
|
||||
|
||||
include { CENTRIFUGE } from '../modules/nf-core/modules/centrifuge/main'
|
||||
|
||||
/*
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
@ -149,9 +149,10 @@ workflow TAXPROFILER {
|
|||
.combine(DB_CHECK.out.dbs)
|
||||
.dump(tag: "reads_plus_db")
|
||||
.branch {
|
||||
malt: it[2]['tool'] == 'malt'
|
||||
kraken2: it[2]['tool'] == 'kraken2'
|
||||
unknown: true
|
||||
malt: it[2]['tool'] == 'malt'
|
||||
kraken2: it[2]['tool'] == 'kraken2'
|
||||
centrifuge: it[2]['tool'] == 'centrifuge'
|
||||
unknown: true
|
||||
}
|
||||
|
||||
//
|
||||
|
@ -184,6 +185,15 @@ workflow TAXPROFILER {
|
|||
db: it[3]
|
||||
}
|
||||
|
||||
// We can run centrifuge one-by-one sample-wise
|
||||
ch_input_for_centrifuge = ch_input_for_profiling.centrifuge
|
||||
.dump(tag: "input for centrifuge")
|
||||
.multiMap {
|
||||
it ->
|
||||
reads: [ it[0] + it[2], it[1] ]
|
||||
db: it[3]
|
||||
}
|
||||
|
||||
//
|
||||
// RUN PROFILING
|
||||
//
|
||||
|
@ -195,6 +205,10 @@ workflow TAXPROFILER {
|
|||
KRAKEN2_KRAKEN2 ( ch_input_for_kraken2.reads, ch_input_for_kraken2.db )
|
||||
}
|
||||
|
||||
if ( params.run_centrifuge ) {
|
||||
CENTRIFUGE ( ch_input_for_centrifuge.reads, ch_input_for_centrifuge.db, params.save_unaligned, params.save_aligned, params.sam_format )
|
||||
}
|
||||
|
||||
//
|
||||
// MODULE: MultiQC
|
||||
//
|
||||
|
|
Loading…
Reference in a new issue