mirror of
https://github.com/MillironX/taxprofiler.git
synced 2024-11-10 22:03:09 +00:00
Add centrifuge classification
This commit is contained in:
parent
358b89a4c6
commit
3ff54e620e
9 changed files with 179 additions and 8 deletions
|
@ -121,4 +121,14 @@ process {
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
withName: CENTRIFUGE {
|
||||||
|
publishDir = [
|
||||||
|
path: { "${params.outdir}/centrifuge/${meta.db_name}" },
|
||||||
|
mode: 'copy',
|
||||||
|
pattern: '*.{fastq.gz,txt}'
|
||||||
|
]
|
||||||
|
ext.args = { "${meta.db_params}" }
|
||||||
|
ext.prefix = { "${meta.id}-${meta.db_name}" }
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -29,5 +29,6 @@ params {
|
||||||
run_kraken2 = true
|
run_kraken2 = true
|
||||||
run_malt = true
|
run_malt = true
|
||||||
shortread_clipmerge = true
|
shortread_clipmerge = true
|
||||||
|
run_centrifuge = true
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -29,6 +29,9 @@
|
||||||
"porechop": {
|
"porechop": {
|
||||||
"git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046"
|
"git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046"
|
||||||
}
|
}
|
||||||
|
"centrifuge": {
|
||||||
|
"git_sha": "ea41a8a6f761b9993d857570e872abaae3fea555"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
63
modules/nf-core/modules/centrifuge/main.nf
generated
Normal file
63
modules/nf-core/modules/centrifuge/main.nf
generated
Normal file
|
@ -0,0 +1,63 @@
|
||||||
|
process CENTRIFUGE {
|
||||||
|
tag "$meta.id"
|
||||||
|
label 'process_high'
|
||||||
|
|
||||||
|
conda (params.enable_conda ? "bioconda::centrifuge=1.0.4_beta" : null)
|
||||||
|
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||||
|
'https://depot.galaxyproject.org/singularity/centrifuge:1.0.4_beta--h9a82719_6' :
|
||||||
|
'quay.io/biocontainers/centrifuge:1.0.4_beta--h9a82719_6' }"
|
||||||
|
|
||||||
|
input:
|
||||||
|
tuple val(meta), path(reads)
|
||||||
|
path db
|
||||||
|
val save_unaligned
|
||||||
|
val save_aligned
|
||||||
|
val sam_format
|
||||||
|
|
||||||
|
output:
|
||||||
|
tuple val(meta), path('*report.txt') , emit: report
|
||||||
|
tuple val(meta), path('*results.txt') , emit: results
|
||||||
|
tuple val(meta), path('*kreport.txt') , emit: kreport
|
||||||
|
tuple val(meta), path('*.sam') , optional: true, emit: sam
|
||||||
|
tuple val(meta), path('*.mapped.fastq{,.1,.2}.gz') , optional: true, emit: fastq_mapped
|
||||||
|
tuple val(meta), path('*.unmapped.fastq{,.1,.2}.gz') , optional: true, emit: fastq_unmapped
|
||||||
|
path "versions.yml" , emit: versions
|
||||||
|
|
||||||
|
when:
|
||||||
|
task.ext.when == null || task.ext.when
|
||||||
|
|
||||||
|
script:
|
||||||
|
def args = task.ext.args ?: ''
|
||||||
|
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||||
|
def paired = meta.single_end ? "-U ${reads}" : "-1 ${reads[0]} -2 ${reads[1]}"
|
||||||
|
def db_name = db.toString().replace(".tar.gz","")
|
||||||
|
def unaligned = ''
|
||||||
|
def aligned = ''
|
||||||
|
if (meta.single_end) {
|
||||||
|
unaligned = save_unaligned ? "--un-gz ${prefix}.unmapped.fastq.gz" : ''
|
||||||
|
aligned = save_aligned ? "--al-gz ${prefix}.mapped.fastq.gz" : ''
|
||||||
|
} else {
|
||||||
|
unaligned = save_unaligned ? "--un-conc-gz ${prefix}.unmapped.fastq.gz" : ''
|
||||||
|
aligned = save_aligned ? "--al-conc-gz ${prefix}.mapped.fastq.gz" : ''
|
||||||
|
}
|
||||||
|
def sam_output = sam_format ? "--out-fmt 'sam'" : ''
|
||||||
|
"""
|
||||||
|
tar -xf $db
|
||||||
|
centrifuge \\
|
||||||
|
-x $db_name \\
|
||||||
|
-p $task.cpus \\
|
||||||
|
$paired \\
|
||||||
|
--report-file ${prefix}.report.txt \\
|
||||||
|
-S ${prefix}.results.txt \\
|
||||||
|
$unaligned \\
|
||||||
|
$aligned \\
|
||||||
|
$sam_output \\
|
||||||
|
$args
|
||||||
|
centrifuge-kreport -x $db_name ${prefix}.results.txt > ${prefix}.kreport.txt
|
||||||
|
|
||||||
|
cat <<-END_VERSIONS > versions.yml
|
||||||
|
"${task.process}":
|
||||||
|
centrifuge: \$( centrifuge --version | sed -n 1p | sed 's/^.*centrifuge-class version //')
|
||||||
|
END_VERSIONS
|
||||||
|
"""
|
||||||
|
}
|
73
modules/nf-core/modules/centrifuge/meta.yml
generated
Normal file
73
modules/nf-core/modules/centrifuge/meta.yml
generated
Normal file
|
@ -0,0 +1,73 @@
|
||||||
|
name: centrifuge
|
||||||
|
description: Classifies metagenomic sequence data
|
||||||
|
keywords:
|
||||||
|
- classify
|
||||||
|
- metagenomics
|
||||||
|
- fastq
|
||||||
|
- db
|
||||||
|
tools:
|
||||||
|
- centrifuge:
|
||||||
|
description: Centrifuge is a classifier for metagenomic sequences.
|
||||||
|
homepage: https://ccb.jhu.edu/software/centrifuge/
|
||||||
|
documentation: https://ccb.jhu.edu/software/centrifuge/manual.shtml
|
||||||
|
doi: 10.1101/gr.210641.116
|
||||||
|
licence: ["GPL v3"]
|
||||||
|
input:
|
||||||
|
- meta:
|
||||||
|
type: map
|
||||||
|
description: |
|
||||||
|
Groovy Map containing sample information
|
||||||
|
e.g. [ id:'test', single_end:false ]
|
||||||
|
- reads:
|
||||||
|
type: file
|
||||||
|
description: |
|
||||||
|
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
|
||||||
|
respectively.
|
||||||
|
- db:
|
||||||
|
type: directory
|
||||||
|
description: Centrifuge database in .tar.gz format
|
||||||
|
pattern: "*.tar.gz"
|
||||||
|
- save_unaligned:
|
||||||
|
type: value
|
||||||
|
description: If true unmapped fastq files are saved
|
||||||
|
- save_aligned:
|
||||||
|
type: value
|
||||||
|
description: If true mapped fastq files are saved
|
||||||
|
output:
|
||||||
|
- meta:
|
||||||
|
type: map
|
||||||
|
description: |
|
||||||
|
Groovy Map containing sample information
|
||||||
|
e.g. [ id:'test', single_end:false ]
|
||||||
|
- report:
|
||||||
|
type: file
|
||||||
|
description: |
|
||||||
|
File containing a classification summary
|
||||||
|
pattern: "*.{report.txt}"
|
||||||
|
- results:
|
||||||
|
type: file
|
||||||
|
description: |
|
||||||
|
File containing classification results
|
||||||
|
pattern: "*.{results.txt}"
|
||||||
|
- kreport:
|
||||||
|
type: file
|
||||||
|
description: |
|
||||||
|
File containing kraken-style report from centrifuge
|
||||||
|
out files.
|
||||||
|
pattern: "*.{kreport.txt}"
|
||||||
|
- fastq_unmapped:
|
||||||
|
type: file
|
||||||
|
description: Unmapped fastq files
|
||||||
|
pattern: "*.unmapped.fastq.gz"
|
||||||
|
- fastq_mapped:
|
||||||
|
type: file
|
||||||
|
description: Mapped fastq files
|
||||||
|
pattern: "*.mapped.fastq.gz"
|
||||||
|
- versions:
|
||||||
|
type: file
|
||||||
|
description: File containing software versions
|
||||||
|
pattern: "versions.yml"
|
||||||
|
authors:
|
||||||
|
- "@sofstam"
|
||||||
|
- "@jfy133"
|
||||||
|
- "@sateeshperi"
|
|
@ -56,7 +56,7 @@ params {
|
||||||
|
|
||||||
// FASTQ preprocessing
|
// FASTQ preprocessing
|
||||||
shortread_clipmerge = false
|
shortread_clipmerge = false
|
||||||
shortread_excludeunmerged = true
|
shortread_excludeunmerged = true
|
||||||
longread_clip = false
|
longread_clip = false
|
||||||
|
|
||||||
// MALT
|
// MALT
|
||||||
|
@ -65,6 +65,12 @@ params {
|
||||||
|
|
||||||
// kraken2
|
// kraken2
|
||||||
run_kraken2 = false
|
run_kraken2 = false
|
||||||
|
|
||||||
|
// centrifuge
|
||||||
|
run_centrifuge = false
|
||||||
|
save_unaligned = false
|
||||||
|
save_aligned = false
|
||||||
|
sam_format = false
|
||||||
}
|
}
|
||||||
|
|
||||||
// Load base.config by default for all pipelines
|
// Load base.config by default for all pipelines
|
||||||
|
|
|
@ -21,7 +21,7 @@ workflow DB_CHECK {
|
||||||
|
|
||||||
ch_dbs_for_untar = parsed_samplesheet
|
ch_dbs_for_untar = parsed_samplesheet
|
||||||
.branch {
|
.branch {
|
||||||
untar: it[1].toString().endsWith(".tar.gz")
|
untar: it[1].toString().endsWith(".tar.gz") && it[0]['tool']!="centrifuge"
|
||||||
skip: true
|
skip: true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -67,8 +67,9 @@ def create_fastq_channel(LinkedHashMap row) {
|
||||||
if (!file(row.fastq_2).exists()) {
|
if (!file(row.fastq_2).exists()) {
|
||||||
exit 1, "ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${row.fastq_2}"
|
exit 1, "ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${row.fastq_2}"
|
||||||
}
|
}
|
||||||
fastq_meta = [ meta, [ file(row.fastq_1), file(row.fastq_2) ] ]
|
fastq_meta = [ meta, [ file(row.fastq_1), file(row.fastq_2) ] ]
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
return fastq_meta
|
return fastq_meta
|
||||||
}
|
}
|
||||||
|
|
|
@ -58,7 +58,7 @@ include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/modules/custom/
|
||||||
include { CAT_FASTQ } from '../modules/nf-core/modules/cat/fastq/main'
|
include { CAT_FASTQ } from '../modules/nf-core/modules/cat/fastq/main'
|
||||||
include { MALT_RUN } from '../modules/nf-core/modules/malt/run/main'
|
include { MALT_RUN } from '../modules/nf-core/modules/malt/run/main'
|
||||||
include { KRAKEN2_KRAKEN2 } from '../modules/nf-core/modules/kraken2/kraken2/main'
|
include { KRAKEN2_KRAKEN2 } from '../modules/nf-core/modules/kraken2/kraken2/main'
|
||||||
|
include { CENTRIFUGE } from '../modules/nf-core/modules/centrifuge/main'
|
||||||
|
|
||||||
/*
|
/*
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
@ -149,9 +149,10 @@ workflow TAXPROFILER {
|
||||||
.combine(DB_CHECK.out.dbs)
|
.combine(DB_CHECK.out.dbs)
|
||||||
.dump(tag: "reads_plus_db")
|
.dump(tag: "reads_plus_db")
|
||||||
.branch {
|
.branch {
|
||||||
malt: it[2]['tool'] == 'malt'
|
malt: it[2]['tool'] == 'malt'
|
||||||
kraken2: it[2]['tool'] == 'kraken2'
|
kraken2: it[2]['tool'] == 'kraken2'
|
||||||
unknown: true
|
centrifuge: it[2]['tool'] == 'centrifuge'
|
||||||
|
unknown: true
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
|
@ -184,6 +185,15 @@ workflow TAXPROFILER {
|
||||||
db: it[3]
|
db: it[3]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// We can run centrifuge one-by-one sample-wise
|
||||||
|
ch_input_for_centrifuge = ch_input_for_profiling.centrifuge
|
||||||
|
.dump(tag: "input for centrifuge")
|
||||||
|
.multiMap {
|
||||||
|
it ->
|
||||||
|
reads: [ it[0] + it[2], it[1] ]
|
||||||
|
db: it[3]
|
||||||
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
// RUN PROFILING
|
// RUN PROFILING
|
||||||
//
|
//
|
||||||
|
@ -195,6 +205,10 @@ workflow TAXPROFILER {
|
||||||
KRAKEN2_KRAKEN2 ( ch_input_for_kraken2.reads, ch_input_for_kraken2.db )
|
KRAKEN2_KRAKEN2 ( ch_input_for_kraken2.reads, ch_input_for_kraken2.db )
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ( params.run_centrifuge ) {
|
||||||
|
CENTRIFUGE ( ch_input_for_centrifuge.reads, ch_input_for_centrifuge.db, params.save_unaligned, params.save_aligned, params.sam_format )
|
||||||
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
// MODULE: MultiQC
|
// MODULE: MultiQC
|
||||||
//
|
//
|
||||||
|
|
Loading…
Reference in a new issue