mirror of
https://github.com/MillironX/taxprofiler.git
synced 2024-11-25 20:29:54 +00:00
Merge pull request #31 from genomic-medicine-sweden/classification_centrifuge
Add centrifuge classification (centrifuge/centrifuge module)
This commit is contained in:
commit
5f24f94391
12 changed files with 307 additions and 5 deletions
|
@ -198,4 +198,14 @@ process {
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
withName: CENTRIFUGE_CENTRIFUGE {
|
||||||
|
publishDir = [
|
||||||
|
path: { "${params.outdir}/centrifuge/${meta.db_name}" },
|
||||||
|
mode: params.publish_dir_mode,
|
||||||
|
pattern: '*.{fastq.gz,txt}'
|
||||||
|
]
|
||||||
|
ext.args = { "${meta.db_params}" }
|
||||||
|
ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -27,6 +27,7 @@ params {
|
||||||
run_kraken2 = true
|
run_kraken2 = true
|
||||||
run_malt = true
|
run_malt = true
|
||||||
run_metaphlan3 = true
|
run_metaphlan3 = true
|
||||||
|
run_centrifuge = true
|
||||||
shortread_clipmerge = true
|
shortread_clipmerge = true
|
||||||
longread_clip = false
|
longread_clip = false
|
||||||
shortread_complexityfilter = true
|
shortread_complexityfilter = true
|
||||||
|
|
|
@ -12,6 +12,9 @@
|
||||||
"cat/fastq": {
|
"cat/fastq": {
|
||||||
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
|
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
|
||||||
},
|
},
|
||||||
|
"centrifuge/centrifuge": {
|
||||||
|
"git_sha": "d2726fcf75063960f06b36d2229a4c0966614108"
|
||||||
|
},
|
||||||
"custom/dumpsoftwareversions": {
|
"custom/dumpsoftwareversions": {
|
||||||
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
|
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
|
||||||
},
|
},
|
||||||
|
|
61
modules/nf-core/modules/centrifuge/centrifuge/main.nf
generated
Normal file
61
modules/nf-core/modules/centrifuge/centrifuge/main.nf
generated
Normal file
|
@ -0,0 +1,61 @@
|
||||||
|
process CENTRIFUGE_CENTRIFUGE {
|
||||||
|
tag "$meta.id"
|
||||||
|
label 'process_high'
|
||||||
|
|
||||||
|
conda (params.enable_conda ? "bioconda::centrifuge=1.0.4_beta" : null)
|
||||||
|
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||||
|
'https://depot.galaxyproject.org/singularity/centrifuge:1.0.4_beta--h9a82719_6' :
|
||||||
|
'quay.io/biocontainers/centrifuge:1.0.4_beta--h9a82719_6' }"
|
||||||
|
|
||||||
|
input:
|
||||||
|
tuple val(meta), path(reads)
|
||||||
|
path db
|
||||||
|
val save_unaligned
|
||||||
|
val save_aligned
|
||||||
|
val sam_format
|
||||||
|
|
||||||
|
output:
|
||||||
|
tuple val(meta), path('*report.txt') , emit: report
|
||||||
|
tuple val(meta), path('*results.txt') , emit: results
|
||||||
|
tuple val(meta), path('*.sam') , optional: true, emit: sam
|
||||||
|
tuple val(meta), path('*.mapped.fastq{,.1,.2}.gz') , optional: true, emit: fastq_mapped
|
||||||
|
tuple val(meta), path('*.unmapped.fastq{,.1,.2}.gz') , optional: true, emit: fastq_unmapped
|
||||||
|
path "versions.yml" , emit: versions
|
||||||
|
|
||||||
|
when:
|
||||||
|
task.ext.when == null || task.ext.when
|
||||||
|
|
||||||
|
script:
|
||||||
|
def args = task.ext.args ?: ''
|
||||||
|
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||||
|
def paired = meta.single_end ? "-U ${reads}" : "-1 ${reads[0]} -2 ${reads[1]}"
|
||||||
|
def unaligned = ''
|
||||||
|
def aligned = ''
|
||||||
|
if (meta.single_end) {
|
||||||
|
unaligned = save_unaligned ? "--un-gz ${prefix}.unmapped.fastq.gz" : ''
|
||||||
|
aligned = save_aligned ? "--al-gz ${prefix}.mapped.fastq.gz" : ''
|
||||||
|
} else {
|
||||||
|
unaligned = save_unaligned ? "--un-conc-gz ${prefix}.unmapped.fastq.gz" : ''
|
||||||
|
aligned = save_aligned ? "--al-conc-gz ${prefix}.mapped.fastq.gz" : ''
|
||||||
|
}
|
||||||
|
def sam_output = sam_format ? "--out-fmt 'sam'" : ''
|
||||||
|
"""
|
||||||
|
## we add "-no-name ._" to ensure silly Mac OSX metafiles files aren't included
|
||||||
|
db_name=`find -L ${db} -name "*.1.cf" -not -name "._*" | sed 's/.1.cf//'`
|
||||||
|
centrifuge \\
|
||||||
|
-x \$db_name \\
|
||||||
|
-p $task.cpus \\
|
||||||
|
$paired \\
|
||||||
|
--report-file ${prefix}.report.txt \\
|
||||||
|
-S ${prefix}.results.txt \\
|
||||||
|
$unaligned \\
|
||||||
|
$aligned \\
|
||||||
|
$sam_output \\
|
||||||
|
$args
|
||||||
|
|
||||||
|
cat <<-END_VERSIONS > versions.yml
|
||||||
|
"${task.process}":
|
||||||
|
centrifuge: \$( centrifuge --version | sed -n 1p | sed 's/^.*centrifuge-class version //')
|
||||||
|
END_VERSIONS
|
||||||
|
"""
|
||||||
|
}
|
66
modules/nf-core/modules/centrifuge/centrifuge/meta.yml
generated
Normal file
66
modules/nf-core/modules/centrifuge/centrifuge/meta.yml
generated
Normal file
|
@ -0,0 +1,66 @@
|
||||||
|
name: centrifuge_centrifuge
|
||||||
|
description: Classifies metagenomic sequence data
|
||||||
|
keywords:
|
||||||
|
- classify
|
||||||
|
- metagenomics
|
||||||
|
- fastq
|
||||||
|
- db
|
||||||
|
tools:
|
||||||
|
- centrifuge:
|
||||||
|
description: Centrifuge is a classifier for metagenomic sequences.
|
||||||
|
homepage: https://ccb.jhu.edu/software/centrifuge/
|
||||||
|
documentation: https://ccb.jhu.edu/software/centrifuge/manual.shtml
|
||||||
|
doi: 10.1101/gr.210641.116
|
||||||
|
licence: ["GPL v3"]
|
||||||
|
input:
|
||||||
|
- meta:
|
||||||
|
type: map
|
||||||
|
description: |
|
||||||
|
Groovy Map containing sample information
|
||||||
|
e.g. [ id:'test', single_end:false ]
|
||||||
|
- reads:
|
||||||
|
type: file
|
||||||
|
description: |
|
||||||
|
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
|
||||||
|
respectively.
|
||||||
|
- db:
|
||||||
|
type: directory
|
||||||
|
description: Path to directory containing centrifuge database files
|
||||||
|
- save_unaligned:
|
||||||
|
type: value
|
||||||
|
description: If true unmapped fastq files are saved
|
||||||
|
- save_aligned:
|
||||||
|
type: value
|
||||||
|
description: If true mapped fastq files are saved
|
||||||
|
output:
|
||||||
|
- meta:
|
||||||
|
type: map
|
||||||
|
description: |
|
||||||
|
Groovy Map containing sample information
|
||||||
|
e.g. [ id:'test', single_end:false ]
|
||||||
|
- report:
|
||||||
|
type: file
|
||||||
|
description: |
|
||||||
|
File containing a classification summary
|
||||||
|
pattern: "*.{report.txt}"
|
||||||
|
- results:
|
||||||
|
type: file
|
||||||
|
description: |
|
||||||
|
File containing classification results
|
||||||
|
pattern: "*.{results.txt}"
|
||||||
|
- fastq_unmapped:
|
||||||
|
type: file
|
||||||
|
description: Unmapped fastq files
|
||||||
|
pattern: "*.unmapped.fastq.gz"
|
||||||
|
- fastq_mapped:
|
||||||
|
type: file
|
||||||
|
description: Mapped fastq files
|
||||||
|
pattern: "*.mapped.fastq.gz"
|
||||||
|
- versions:
|
||||||
|
type: file
|
||||||
|
description: File containing software versions
|
||||||
|
pattern: "versions.yml"
|
||||||
|
authors:
|
||||||
|
- "@sofstam"
|
||||||
|
- "@jfy133"
|
||||||
|
- "@sateeshperi"
|
|
@ -84,6 +84,11 @@ params {
|
||||||
// kraken2
|
// kraken2
|
||||||
run_kraken2 = false
|
run_kraken2 = false
|
||||||
|
|
||||||
|
// centrifuge
|
||||||
|
run_centrifuge = false
|
||||||
|
centrifuge_save_unaligned = false
|
||||||
|
centrifuge_save_aligned = false
|
||||||
|
centrifuge_sam_format = false
|
||||||
// metaphlan3
|
// metaphlan3
|
||||||
run_metaphlan3 = false
|
run_metaphlan3 = false
|
||||||
}
|
}
|
||||||
|
|
|
@ -281,6 +281,18 @@
|
||||||
"run_kraken2": {
|
"run_kraken2": {
|
||||||
"type": "boolean"
|
"type": "boolean"
|
||||||
},
|
},
|
||||||
|
"run_centrifuge": {
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
"centrifuge_save_unaligned": {
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
"centrifuge_save_aligned": {
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
"centrifuge_sam_format": {
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
"run_metaphlan3": {
|
"run_metaphlan3": {
|
||||||
"type": "boolean",
|
"type": "boolean",
|
||||||
"description": "Enable MetaPhlAn for taxonomic profiling"
|
"description": "Enable MetaPhlAn for taxonomic profiling"
|
||||||
|
|
61
nf-core/modules/centrifuge/centrifuge/main.nf
Normal file
61
nf-core/modules/centrifuge/centrifuge/main.nf
Normal file
|
@ -0,0 +1,61 @@
|
||||||
|
process CENTRIFUGE_CENTRIFUGE {
|
||||||
|
tag "$meta.id"
|
||||||
|
label 'process_high'
|
||||||
|
|
||||||
|
conda (params.enable_conda ? "bioconda::centrifuge=1.0.4_beta" : null)
|
||||||
|
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||||
|
'https://depot.galaxyproject.org/singularity/centrifuge:1.0.4_beta--h9a82719_6' :
|
||||||
|
'quay.io/biocontainers/centrifuge:1.0.4_beta--h9a82719_6' }"
|
||||||
|
|
||||||
|
input:
|
||||||
|
tuple val(meta), path(reads)
|
||||||
|
path db
|
||||||
|
val save_unaligned
|
||||||
|
val save_aligned
|
||||||
|
val sam_format
|
||||||
|
|
||||||
|
output:
|
||||||
|
tuple val(meta), path('*report.txt') , emit: report
|
||||||
|
tuple val(meta), path('*results.txt') , emit: results
|
||||||
|
tuple val(meta), path('*.sam') , optional: true, emit: sam
|
||||||
|
tuple val(meta), path('*.mapped.fastq{,.1,.2}.gz') , optional: true, emit: fastq_mapped
|
||||||
|
tuple val(meta), path('*.unmapped.fastq{,.1,.2}.gz') , optional: true, emit: fastq_unmapped
|
||||||
|
path "versions.yml" , emit: versions
|
||||||
|
|
||||||
|
when:
|
||||||
|
task.ext.when == null || task.ext.when
|
||||||
|
|
||||||
|
script:
|
||||||
|
def args = task.ext.args ?: ''
|
||||||
|
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||||
|
def paired = meta.single_end ? "-U ${reads}" : "-1 ${reads[0]} -2 ${reads[1]}"
|
||||||
|
def unaligned = ''
|
||||||
|
def aligned = ''
|
||||||
|
if (meta.single_end) {
|
||||||
|
unaligned = save_unaligned ? "--un-gz ${prefix}.unmapped.fastq.gz" : ''
|
||||||
|
aligned = save_aligned ? "--al-gz ${prefix}.mapped.fastq.gz" : ''
|
||||||
|
} else {
|
||||||
|
unaligned = save_unaligned ? "--un-conc-gz ${prefix}.unmapped.fastq.gz" : ''
|
||||||
|
aligned = save_aligned ? "--al-conc-gz ${prefix}.mapped.fastq.gz" : ''
|
||||||
|
}
|
||||||
|
def sam_output = sam_format ? "--out-fmt 'sam'" : ''
|
||||||
|
"""
|
||||||
|
## we add "-no-name ._" to ensure silly Mac OSX metafiles files aren't included
|
||||||
|
db_name=`find -L ${db} -name "*.1.cf" -not -name "._*" | sed 's/.1.cf//'`
|
||||||
|
centrifuge \\
|
||||||
|
-x \$db_name \\
|
||||||
|
-p $task.cpus \\
|
||||||
|
$paired \\
|
||||||
|
--report-file ${prefix}.report.txt \\
|
||||||
|
-S ${prefix}.results.txt \\
|
||||||
|
$unaligned \\
|
||||||
|
$aligned \\
|
||||||
|
$sam_output \\
|
||||||
|
$args
|
||||||
|
|
||||||
|
cat <<-END_VERSIONS > versions.yml
|
||||||
|
"${task.process}":
|
||||||
|
centrifuge: \$( centrifuge --version | sed -n 1p | sed 's/^.*centrifuge-class version //')
|
||||||
|
END_VERSIONS
|
||||||
|
"""
|
||||||
|
}
|
66
nf-core/modules/centrifuge/centrifuge/meta.yml
Normal file
66
nf-core/modules/centrifuge/centrifuge/meta.yml
Normal file
|
@ -0,0 +1,66 @@
|
||||||
|
name: centrifuge_centrifuge
|
||||||
|
description: Classifies metagenomic sequence data
|
||||||
|
keywords:
|
||||||
|
- classify
|
||||||
|
- metagenomics
|
||||||
|
- fastq
|
||||||
|
- db
|
||||||
|
tools:
|
||||||
|
- centrifuge:
|
||||||
|
description: Centrifuge is a classifier for metagenomic sequences.
|
||||||
|
homepage: https://ccb.jhu.edu/software/centrifuge/
|
||||||
|
documentation: https://ccb.jhu.edu/software/centrifuge/manual.shtml
|
||||||
|
doi: 10.1101/gr.210641.116
|
||||||
|
licence: ["GPL v3"]
|
||||||
|
input:
|
||||||
|
- meta:
|
||||||
|
type: map
|
||||||
|
description: |
|
||||||
|
Groovy Map containing sample information
|
||||||
|
e.g. [ id:'test', single_end:false ]
|
||||||
|
- reads:
|
||||||
|
type: file
|
||||||
|
description: |
|
||||||
|
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
|
||||||
|
respectively.
|
||||||
|
- db:
|
||||||
|
type: directory
|
||||||
|
description: Path to directory containing centrifuge database files
|
||||||
|
- save_unaligned:
|
||||||
|
type: value
|
||||||
|
description: If true unmapped fastq files are saved
|
||||||
|
- save_aligned:
|
||||||
|
type: value
|
||||||
|
description: If true mapped fastq files are saved
|
||||||
|
output:
|
||||||
|
- meta:
|
||||||
|
type: map
|
||||||
|
description: |
|
||||||
|
Groovy Map containing sample information
|
||||||
|
e.g. [ id:'test', single_end:false ]
|
||||||
|
- report:
|
||||||
|
type: file
|
||||||
|
description: |
|
||||||
|
File containing a classification summary
|
||||||
|
pattern: "*.{report.txt}"
|
||||||
|
- results:
|
||||||
|
type: file
|
||||||
|
description: |
|
||||||
|
File containing classification results
|
||||||
|
pattern: "*.{results.txt}"
|
||||||
|
- fastq_unmapped:
|
||||||
|
type: file
|
||||||
|
description: Unmapped fastq files
|
||||||
|
pattern: "*.unmapped.fastq.gz"
|
||||||
|
- fastq_mapped:
|
||||||
|
type: file
|
||||||
|
description: Mapped fastq files
|
||||||
|
pattern: "*.mapped.fastq.gz"
|
||||||
|
- versions:
|
||||||
|
type: file
|
||||||
|
description: File containing software versions
|
||||||
|
pattern: "versions.yml"
|
||||||
|
authors:
|
||||||
|
- "@sofstam"
|
||||||
|
- "@jfy133"
|
||||||
|
- "@sateeshperi"
|
|
@ -22,7 +22,7 @@ workflow DB_CHECK {
|
||||||
|
|
||||||
ch_dbs_for_untar = parsed_samplesheet
|
ch_dbs_for_untar = parsed_samplesheet
|
||||||
.branch {
|
.branch {
|
||||||
untar: it[1].toString().endsWith(".tar.gz") && it[0]['tool'] != 'centrifuge'
|
untar: it[1].toString().endsWith(".tar.gz")
|
||||||
skip: true
|
skip: true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -63,13 +63,12 @@ def create_fastq_channel(LinkedHashMap row) {
|
||||||
if (!file(row.fastq_2).exists()) {
|
if (!file(row.fastq_2).exists()) {
|
||||||
exit 1, "ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${row.fastq_2}"
|
exit 1, "ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${row.fastq_2}"
|
||||||
}
|
}
|
||||||
fastq_meta = [ meta, [ file(row.fastq_1), file(row.fastq_2) ] ]
|
fastq_meta = [ meta, [ file(row.fastq_1), file(row.fastq_2) ] ]
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
return fastq_meta
|
return fastq_meta
|
||||||
}
|
}// Function to get list of [ meta, fasta ]
|
||||||
|
|
||||||
// Function to get list of [ meta, fasta ]
|
|
||||||
def create_fasta_channel(LinkedHashMap row) {
|
def create_fasta_channel(LinkedHashMap row) {
|
||||||
def meta = [:]
|
def meta = [:]
|
||||||
meta.id = row.sample
|
meta.id = row.sample
|
||||||
|
|
|
@ -61,6 +61,7 @@ include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/modules/custom/
|
||||||
include { CAT_FASTQ } from '../modules/nf-core/modules/cat/fastq/main'
|
include { CAT_FASTQ } from '../modules/nf-core/modules/cat/fastq/main'
|
||||||
include { MALT_RUN } from '../modules/nf-core/modules/malt/run/main'
|
include { MALT_RUN } from '../modules/nf-core/modules/malt/run/main'
|
||||||
include { KRAKEN2_KRAKEN2 } from '../modules/nf-core/modules/kraken2/kraken2/main'
|
include { KRAKEN2_KRAKEN2 } from '../modules/nf-core/modules/kraken2/kraken2/main'
|
||||||
|
include { CENTRIFUGE_CENTRIFUGE } from '../modules/nf-core/modules/centrifuge/centrifuge/main'
|
||||||
include { METAPHLAN3 } from '../modules/nf-core/modules/metaphlan3/main'
|
include { METAPHLAN3 } from '../modules/nf-core/modules/metaphlan3/main'
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -137,6 +138,7 @@ workflow TAXPROFILER {
|
||||||
malt: it[2]['tool'] == 'malt'
|
malt: it[2]['tool'] == 'malt'
|
||||||
kraken2: it[2]['tool'] == 'kraken2'
|
kraken2: it[2]['tool'] == 'kraken2'
|
||||||
metaphlan3: it[2]['tool'] == 'metaphlan3'
|
metaphlan3: it[2]['tool'] == 'metaphlan3'
|
||||||
|
centrifuge: it[2]['tool'] == 'centrifuge'
|
||||||
unknown: true
|
unknown: true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -170,6 +172,18 @@ workflow TAXPROFILER {
|
||||||
db: it[3]
|
db: it[3]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// We can run centrifuge one-by-one sample-wise
|
||||||
|
ch_input_for_centrifuge = ch_input_for_profiling.centrifuge
|
||||||
|
.dump(tag: "input for centrifuge")
|
||||||
|
.multiMap {
|
||||||
|
it ->
|
||||||
|
reads: [ it[0] + it[2], it[1] ]
|
||||||
|
db: it[3]
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// RUN PROFILING
|
||||||
|
//
|
||||||
ch_input_for_metaphlan3 = ch_input_for_profiling.metaphlan3
|
ch_input_for_metaphlan3 = ch_input_for_profiling.metaphlan3
|
||||||
.multiMap {
|
.multiMap {
|
||||||
it ->
|
it ->
|
||||||
|
@ -188,6 +202,10 @@ workflow TAXPROFILER {
|
||||||
KRAKEN2_KRAKEN2 ( ch_input_for_kraken2.reads, ch_input_for_kraken2.db )
|
KRAKEN2_KRAKEN2 ( ch_input_for_kraken2.reads, ch_input_for_kraken2.db )
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ( params.run_centrifuge ) {
|
||||||
|
CENTRIFUGE_CENTRIFUGE ( ch_input_for_centrifuge.reads, ch_input_for_centrifuge.db, params.centrifuge_save_unaligned, params.centrifuge_save_aligned, params.centrifuge_sam_format )
|
||||||
|
}
|
||||||
|
|
||||||
if ( params.run_metaphlan3 ) {
|
if ( params.run_metaphlan3 ) {
|
||||||
METAPHLAN3 ( ch_input_for_metaphlan3.reads, ch_input_for_metaphlan3.db )
|
METAPHLAN3 ( ch_input_for_metaphlan3.reads, ch_input_for_metaphlan3.db )
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue