mirror of
https://github.com/MillironX/taxprofiler.git
synced 2024-11-25 18:19:55 +00:00
Merge pull request #31 from genomic-medicine-sweden/classification_centrifuge
Add centrifuge classification (centrifuge/centrifuge module)
This commit is contained in:
commit
5f24f94391
12 changed files with 307 additions and 5 deletions
|
@ -198,4 +198,14 @@ process {
|
|||
]
|
||||
}
|
||||
|
||||
withName: CENTRIFUGE_CENTRIFUGE {
|
||||
publishDir = [
|
||||
path: { "${params.outdir}/centrifuge/${meta.db_name}" },
|
||||
mode: params.publish_dir_mode,
|
||||
pattern: '*.{fastq.gz,txt}'
|
||||
]
|
||||
ext.args = { "${meta.db_params}" }
|
||||
ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -27,6 +27,7 @@ params {
|
|||
run_kraken2 = true
|
||||
run_malt = true
|
||||
run_metaphlan3 = true
|
||||
run_centrifuge = true
|
||||
shortread_clipmerge = true
|
||||
longread_clip = false
|
||||
shortread_complexityfilter = true
|
||||
|
|
|
@ -12,6 +12,9 @@
|
|||
"cat/fastq": {
|
||||
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
|
||||
},
|
||||
"centrifuge/centrifuge": {
|
||||
"git_sha": "d2726fcf75063960f06b36d2229a4c0966614108"
|
||||
},
|
||||
"custom/dumpsoftwareversions": {
|
||||
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
|
||||
},
|
||||
|
|
61
modules/nf-core/modules/centrifuge/centrifuge/main.nf
generated
Normal file
61
modules/nf-core/modules/centrifuge/centrifuge/main.nf
generated
Normal file
|
@ -0,0 +1,61 @@
|
|||
process CENTRIFUGE_CENTRIFUGE {
|
||||
tag "$meta.id"
|
||||
label 'process_high'
|
||||
|
||||
conda (params.enable_conda ? "bioconda::centrifuge=1.0.4_beta" : null)
|
||||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||
'https://depot.galaxyproject.org/singularity/centrifuge:1.0.4_beta--h9a82719_6' :
|
||||
'quay.io/biocontainers/centrifuge:1.0.4_beta--h9a82719_6' }"
|
||||
|
||||
input:
|
||||
tuple val(meta), path(reads)
|
||||
path db
|
||||
val save_unaligned
|
||||
val save_aligned
|
||||
val sam_format
|
||||
|
||||
output:
|
||||
tuple val(meta), path('*report.txt') , emit: report
|
||||
tuple val(meta), path('*results.txt') , emit: results
|
||||
tuple val(meta), path('*.sam') , optional: true, emit: sam
|
||||
tuple val(meta), path('*.mapped.fastq{,.1,.2}.gz') , optional: true, emit: fastq_mapped
|
||||
tuple val(meta), path('*.unmapped.fastq{,.1,.2}.gz') , optional: true, emit: fastq_unmapped
|
||||
path "versions.yml" , emit: versions
|
||||
|
||||
when:
|
||||
task.ext.when == null || task.ext.when
|
||||
|
||||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
def paired = meta.single_end ? "-U ${reads}" : "-1 ${reads[0]} -2 ${reads[1]}"
|
||||
def unaligned = ''
|
||||
def aligned = ''
|
||||
if (meta.single_end) {
|
||||
unaligned = save_unaligned ? "--un-gz ${prefix}.unmapped.fastq.gz" : ''
|
||||
aligned = save_aligned ? "--al-gz ${prefix}.mapped.fastq.gz" : ''
|
||||
} else {
|
||||
unaligned = save_unaligned ? "--un-conc-gz ${prefix}.unmapped.fastq.gz" : ''
|
||||
aligned = save_aligned ? "--al-conc-gz ${prefix}.mapped.fastq.gz" : ''
|
||||
}
|
||||
def sam_output = sam_format ? "--out-fmt 'sam'" : ''
|
||||
"""
|
||||
## we add "-no-name ._" to ensure silly Mac OSX metafiles files aren't included
|
||||
db_name=`find -L ${db} -name "*.1.cf" -not -name "._*" | sed 's/.1.cf//'`
|
||||
centrifuge \\
|
||||
-x \$db_name \\
|
||||
-p $task.cpus \\
|
||||
$paired \\
|
||||
--report-file ${prefix}.report.txt \\
|
||||
-S ${prefix}.results.txt \\
|
||||
$unaligned \\
|
||||
$aligned \\
|
||||
$sam_output \\
|
||||
$args
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
centrifuge: \$( centrifuge --version | sed -n 1p | sed 's/^.*centrifuge-class version //')
|
||||
END_VERSIONS
|
||||
"""
|
||||
}
|
66
modules/nf-core/modules/centrifuge/centrifuge/meta.yml
generated
Normal file
66
modules/nf-core/modules/centrifuge/centrifuge/meta.yml
generated
Normal file
|
@ -0,0 +1,66 @@
|
|||
name: centrifuge_centrifuge
|
||||
description: Classifies metagenomic sequence data
|
||||
keywords:
|
||||
- classify
|
||||
- metagenomics
|
||||
- fastq
|
||||
- db
|
||||
tools:
|
||||
- centrifuge:
|
||||
description: Centrifuge is a classifier for metagenomic sequences.
|
||||
homepage: https://ccb.jhu.edu/software/centrifuge/
|
||||
documentation: https://ccb.jhu.edu/software/centrifuge/manual.shtml
|
||||
doi: 10.1101/gr.210641.116
|
||||
licence: ["GPL v3"]
|
||||
input:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- reads:
|
||||
type: file
|
||||
description: |
|
||||
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
|
||||
respectively.
|
||||
- db:
|
||||
type: directory
|
||||
description: Path to directory containing centrifuge database files
|
||||
- save_unaligned:
|
||||
type: value
|
||||
description: If true unmapped fastq files are saved
|
||||
- save_aligned:
|
||||
type: value
|
||||
description: If true mapped fastq files are saved
|
||||
output:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- report:
|
||||
type: file
|
||||
description: |
|
||||
File containing a classification summary
|
||||
pattern: "*.{report.txt}"
|
||||
- results:
|
||||
type: file
|
||||
description: |
|
||||
File containing classification results
|
||||
pattern: "*.{results.txt}"
|
||||
- fastq_unmapped:
|
||||
type: file
|
||||
description: Unmapped fastq files
|
||||
pattern: "*.unmapped.fastq.gz"
|
||||
- fastq_mapped:
|
||||
type: file
|
||||
description: Mapped fastq files
|
||||
pattern: "*.mapped.fastq.gz"
|
||||
- versions:
|
||||
type: file
|
||||
description: File containing software versions
|
||||
pattern: "versions.yml"
|
||||
authors:
|
||||
- "@sofstam"
|
||||
- "@jfy133"
|
||||
- "@sateeshperi"
|
|
@ -84,6 +84,11 @@ params {
|
|||
// kraken2
|
||||
run_kraken2 = false
|
||||
|
||||
// centrifuge
|
||||
run_centrifuge = false
|
||||
centrifuge_save_unaligned = false
|
||||
centrifuge_save_aligned = false
|
||||
centrifuge_sam_format = false
|
||||
// metaphlan3
|
||||
run_metaphlan3 = false
|
||||
}
|
||||
|
|
|
@ -281,6 +281,18 @@
|
|||
"run_kraken2": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"run_centrifuge": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"centrifuge_save_unaligned": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"centrifuge_save_aligned": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"centrifuge_sam_format": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"run_metaphlan3": {
|
||||
"type": "boolean",
|
||||
"description": "Enable MetaPhlAn for taxonomic profiling"
|
||||
|
|
61
nf-core/modules/centrifuge/centrifuge/main.nf
Normal file
61
nf-core/modules/centrifuge/centrifuge/main.nf
Normal file
|
@ -0,0 +1,61 @@
|
|||
process CENTRIFUGE_CENTRIFUGE {
|
||||
tag "$meta.id"
|
||||
label 'process_high'
|
||||
|
||||
conda (params.enable_conda ? "bioconda::centrifuge=1.0.4_beta" : null)
|
||||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||
'https://depot.galaxyproject.org/singularity/centrifuge:1.0.4_beta--h9a82719_6' :
|
||||
'quay.io/biocontainers/centrifuge:1.0.4_beta--h9a82719_6' }"
|
||||
|
||||
input:
|
||||
tuple val(meta), path(reads)
|
||||
path db
|
||||
val save_unaligned
|
||||
val save_aligned
|
||||
val sam_format
|
||||
|
||||
output:
|
||||
tuple val(meta), path('*report.txt') , emit: report
|
||||
tuple val(meta), path('*results.txt') , emit: results
|
||||
tuple val(meta), path('*.sam') , optional: true, emit: sam
|
||||
tuple val(meta), path('*.mapped.fastq{,.1,.2}.gz') , optional: true, emit: fastq_mapped
|
||||
tuple val(meta), path('*.unmapped.fastq{,.1,.2}.gz') , optional: true, emit: fastq_unmapped
|
||||
path "versions.yml" , emit: versions
|
||||
|
||||
when:
|
||||
task.ext.when == null || task.ext.when
|
||||
|
||||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
def paired = meta.single_end ? "-U ${reads}" : "-1 ${reads[0]} -2 ${reads[1]}"
|
||||
def unaligned = ''
|
||||
def aligned = ''
|
||||
if (meta.single_end) {
|
||||
unaligned = save_unaligned ? "--un-gz ${prefix}.unmapped.fastq.gz" : ''
|
||||
aligned = save_aligned ? "--al-gz ${prefix}.mapped.fastq.gz" : ''
|
||||
} else {
|
||||
unaligned = save_unaligned ? "--un-conc-gz ${prefix}.unmapped.fastq.gz" : ''
|
||||
aligned = save_aligned ? "--al-conc-gz ${prefix}.mapped.fastq.gz" : ''
|
||||
}
|
||||
def sam_output = sam_format ? "--out-fmt 'sam'" : ''
|
||||
"""
|
||||
## we add "-no-name ._" to ensure silly Mac OSX metafiles files aren't included
|
||||
db_name=`find -L ${db} -name "*.1.cf" -not -name "._*" | sed 's/.1.cf//'`
|
||||
centrifuge \\
|
||||
-x \$db_name \\
|
||||
-p $task.cpus \\
|
||||
$paired \\
|
||||
--report-file ${prefix}.report.txt \\
|
||||
-S ${prefix}.results.txt \\
|
||||
$unaligned \\
|
||||
$aligned \\
|
||||
$sam_output \\
|
||||
$args
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
centrifuge: \$( centrifuge --version | sed -n 1p | sed 's/^.*centrifuge-class version //')
|
||||
END_VERSIONS
|
||||
"""
|
||||
}
|
66
nf-core/modules/centrifuge/centrifuge/meta.yml
Normal file
66
nf-core/modules/centrifuge/centrifuge/meta.yml
Normal file
|
@ -0,0 +1,66 @@
|
|||
name: centrifuge_centrifuge
|
||||
description: Classifies metagenomic sequence data
|
||||
keywords:
|
||||
- classify
|
||||
- metagenomics
|
||||
- fastq
|
||||
- db
|
||||
tools:
|
||||
- centrifuge:
|
||||
description: Centrifuge is a classifier for metagenomic sequences.
|
||||
homepage: https://ccb.jhu.edu/software/centrifuge/
|
||||
documentation: https://ccb.jhu.edu/software/centrifuge/manual.shtml
|
||||
doi: 10.1101/gr.210641.116
|
||||
licence: ["GPL v3"]
|
||||
input:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- reads:
|
||||
type: file
|
||||
description: |
|
||||
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
|
||||
respectively.
|
||||
- db:
|
||||
type: directory
|
||||
description: Path to directory containing centrifuge database files
|
||||
- save_unaligned:
|
||||
type: value
|
||||
description: If true unmapped fastq files are saved
|
||||
- save_aligned:
|
||||
type: value
|
||||
description: If true mapped fastq files are saved
|
||||
output:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- report:
|
||||
type: file
|
||||
description: |
|
||||
File containing a classification summary
|
||||
pattern: "*.{report.txt}"
|
||||
- results:
|
||||
type: file
|
||||
description: |
|
||||
File containing classification results
|
||||
pattern: "*.{results.txt}"
|
||||
- fastq_unmapped:
|
||||
type: file
|
||||
description: Unmapped fastq files
|
||||
pattern: "*.unmapped.fastq.gz"
|
||||
- fastq_mapped:
|
||||
type: file
|
||||
description: Mapped fastq files
|
||||
pattern: "*.mapped.fastq.gz"
|
||||
- versions:
|
||||
type: file
|
||||
description: File containing software versions
|
||||
pattern: "versions.yml"
|
||||
authors:
|
||||
- "@sofstam"
|
||||
- "@jfy133"
|
||||
- "@sateeshperi"
|
|
@ -22,7 +22,7 @@ workflow DB_CHECK {
|
|||
|
||||
ch_dbs_for_untar = parsed_samplesheet
|
||||
.branch {
|
||||
untar: it[1].toString().endsWith(".tar.gz") && it[0]['tool'] != 'centrifuge'
|
||||
untar: it[1].toString().endsWith(".tar.gz")
|
||||
skip: true
|
||||
}
|
||||
|
||||
|
|
|
@ -63,13 +63,12 @@ def create_fastq_channel(LinkedHashMap row) {
|
|||
if (!file(row.fastq_2).exists()) {
|
||||
exit 1, "ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${row.fastq_2}"
|
||||
}
|
||||
fastq_meta = [ meta, [ file(row.fastq_1), file(row.fastq_2) ] ]
|
||||
fastq_meta = [ meta, [ file(row.fastq_1), file(row.fastq_2) ] ]
|
||||
}
|
||||
|
||||
}
|
||||
return fastq_meta
|
||||
}
|
||||
|
||||
// Function to get list of [ meta, fasta ]
|
||||
}// Function to get list of [ meta, fasta ]
|
||||
def create_fasta_channel(LinkedHashMap row) {
|
||||
def meta = [:]
|
||||
meta.id = row.sample
|
||||
|
|
|
@ -61,6 +61,7 @@ include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/modules/custom/
|
|||
include { CAT_FASTQ } from '../modules/nf-core/modules/cat/fastq/main'
|
||||
include { MALT_RUN } from '../modules/nf-core/modules/malt/run/main'
|
||||
include { KRAKEN2_KRAKEN2 } from '../modules/nf-core/modules/kraken2/kraken2/main'
|
||||
include { CENTRIFUGE_CENTRIFUGE } from '../modules/nf-core/modules/centrifuge/centrifuge/main'
|
||||
include { METAPHLAN3 } from '../modules/nf-core/modules/metaphlan3/main'
|
||||
|
||||
/*
|
||||
|
@ -137,6 +138,7 @@ workflow TAXPROFILER {
|
|||
malt: it[2]['tool'] == 'malt'
|
||||
kraken2: it[2]['tool'] == 'kraken2'
|
||||
metaphlan3: it[2]['tool'] == 'metaphlan3'
|
||||
centrifuge: it[2]['tool'] == 'centrifuge'
|
||||
unknown: true
|
||||
}
|
||||
|
||||
|
@ -170,6 +172,18 @@ workflow TAXPROFILER {
|
|||
db: it[3]
|
||||
}
|
||||
|
||||
// We can run centrifuge one-by-one sample-wise
|
||||
ch_input_for_centrifuge = ch_input_for_profiling.centrifuge
|
||||
.dump(tag: "input for centrifuge")
|
||||
.multiMap {
|
||||
it ->
|
||||
reads: [ it[0] + it[2], it[1] ]
|
||||
db: it[3]
|
||||
}
|
||||
|
||||
//
|
||||
// RUN PROFILING
|
||||
//
|
||||
ch_input_for_metaphlan3 = ch_input_for_profiling.metaphlan3
|
||||
.multiMap {
|
||||
it ->
|
||||
|
@ -188,6 +202,10 @@ workflow TAXPROFILER {
|
|||
KRAKEN2_KRAKEN2 ( ch_input_for_kraken2.reads, ch_input_for_kraken2.db )
|
||||
}
|
||||
|
||||
if ( params.run_centrifuge ) {
|
||||
CENTRIFUGE_CENTRIFUGE ( ch_input_for_centrifuge.reads, ch_input_for_centrifuge.db, params.centrifuge_save_unaligned, params.centrifuge_save_aligned, params.centrifuge_sam_format )
|
||||
}
|
||||
|
||||
if ( params.run_metaphlan3 ) {
|
||||
METAPHLAN3 ( ch_input_for_metaphlan3.reads, ch_input_for_metaphlan3.db )
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue