1
0
Fork 0
mirror of https://github.com/MillironX/taxprofiler.git synced 2024-09-21 04:22:04 +00:00

Add working output for Kraken2/Centrifuge/DIAMOND

This commit is contained in:
James Fellows Yates 2022-05-07 05:22:35 +02:00
parent e7b54801ed
commit 87a1d80519
11 changed files with 181 additions and 65 deletions

View file

@ -271,7 +271,7 @@ process {
publishDir = [ publishDir = [
path: { "${params.outdir}/kraken2/${meta.db_name}" }, path: { "${params.outdir}/kraken2/${meta.db_name}" },
mode: params.publish_dir_mode, mode: params.publish_dir_mode,
pattern: '*.{txt}' pattern: '*.{txt,report,fastq.gz}'
] ]
} }
@ -289,7 +289,7 @@ process {
publishDir = [ publishDir = [
path: { "${params.outdir}/centrifuge/${meta.db_name}" }, path: { "${params.outdir}/centrifuge/${meta.db_name}" },
mode: params.publish_dir_mode, mode: params.publish_dir_mode,
pattern: '*.txt' pattern: '*.{txt,sam,gz}'
] ]
ext.args = { "${meta.db_params}" } ext.args = { "${meta.db_params}" }
ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" }

View file

@ -36,6 +36,10 @@ params {
run_metaphlan3 = true run_metaphlan3 = true
run_centrifuge = true run_centrifuge = true
run_diamond = true run_diamond = true
malt_save_reads = true
kraken2_save_reads = true
centrifuge_save_reads = true
diamond_save_reads = true
} }
process { process {

View file

@ -28,7 +28,7 @@
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
}, },
"diamond/blastx": { "diamond/blastx": {
"git_sha": "42564565b934eeb2449e35ec97ed13ff2a67f1de" "git_sha": "bd3bfe0817246082525ab93707976676b1fe208b"
}, },
"fastp": { "fastp": {
"git_sha": "d0a1cbb703a130c19f6796c3fce24fbe7dfce789" "git_sha": "d0a1cbb703a130c19f6796c3fce24fbe7dfce789"
@ -43,7 +43,7 @@
"git_sha": "538dbac98ba9c8f799536cd5a617195501439457" "git_sha": "538dbac98ba9c8f799536cd5a617195501439457"
}, },
"kraken2/kraken2": { "kraken2/kraken2": {
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" "git_sha": "abe025677cdd805cc93032341ab19885473c1a07"
}, },
"malt/run": { "malt/run": {
"git_sha": "72b96f4e504eef673f2b5c13560a9d90b669129b" "git_sha": "72b96f4e504eef673f2b5c13560a9d90b669129b"

View file

@ -2,21 +2,26 @@ process DIAMOND_BLASTX {
tag "$meta.id" tag "$meta.id"
label 'process_medium' label 'process_medium'
// Dimaond is limited to v2.0.9 because there is not a conda (params.enable_conda ? "bioconda::diamond=2.0.15" : null)
// singularity version higher than this at the current time.
conda (params.enable_conda ? "bioconda::diamond=2.0.9" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/diamond:2.0.9--hdcc8f71_0' : 'https://depot.galaxyproject.org/singularity/diamond:2.0.15--hb97b32f_0' :
'quay.io/biocontainers/diamond:2.0.9--hdcc8f71_0' }" 'quay.io/biocontainers/diamond:2.0.15--hb97b32f_0' }"
input: input:
tuple val(meta), path(fasta) tuple val(meta), path(fasta)
path db path db
val outext val out_ext
val blast_columns
output: output:
tuple val(meta), path('*.{blast,xml,txt,daa,sam,tsv,paf}'), emit: output tuple val(meta), path('*.blast'), optional: true, emit: blast
path "versions.yml" , emit: versions tuple val(meta), path('*.xml') , optional: true, emit: xml
tuple val(meta), path('*.txt') , optional: true, emit: txt
tuple val(meta), path('*.daa') , optional: true, emit: daa
tuple val(meta), path('*.sam') , optional: true, emit: sam
tuple val(meta), path('*.tsv') , optional: true, emit: tsv
tuple val(meta), path('*.paf') , optional: true, emit: paf
path "versions.yml" , emit: versions
when: when:
task.ext.when == null || task.ext.when task.ext.when == null || task.ext.when
@ -24,7 +29,8 @@ process DIAMOND_BLASTX {
script: script:
def args = task.ext.args ?: '' def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}" def prefix = task.ext.prefix ?: "${meta.id}"
switch ( outext ) { def columns = blast_columns ? "${blast_columns}" : ''
switch ( out_ext ) {
case "blast": outfmt = 0; break case "blast": outfmt = 0; break
case "xml": outfmt = 5; break case "xml": outfmt = 5; break
case "txt": outfmt = 6; break case "txt": outfmt = 6; break
@ -32,6 +38,11 @@ process DIAMOND_BLASTX {
case "sam": outfmt = 101; break case "sam": outfmt = 101; break
case "tsv": outfmt = 102; break case "tsv": outfmt = 102; break
case "paf": outfmt = 103; break case "paf": outfmt = 103; break
default:
outfmt = '6';
out_ext = 'txt';
log.warn("Unknown output file format provided (${out_ext}): selecting DIAMOND default of tabular BLAST output (txt)");
break
} }
""" """
DB=`find -L ./ -name "*.dmnd" | sed 's/.dmnd//'` DB=`find -L ./ -name "*.dmnd" | sed 's/.dmnd//'`
@ -41,9 +52,9 @@ process DIAMOND_BLASTX {
--threads $task.cpus \\ --threads $task.cpus \\
--db \$DB \\ --db \$DB \\
--query $fasta \\ --query $fasta \\
--outfmt ${outfmt} \\ --outfmt ${outfmt} ${columns} \\
$args \\ $args \\
--out ${prefix}.${outext} --out ${prefix}.${out_ext}
cat <<-END_VERSIONS > versions.yml cat <<-END_VERSIONS > versions.yml
"${task.process}": "${task.process}":

View file

@ -28,7 +28,7 @@ input:
type: directory type: directory
description: Directory containing the nucelotide blast database description: Directory containing the nucelotide blast database
pattern: "*" pattern: "*"
- outext: - out_ext:
type: string type: string
description: | description: |
Specify the type of output file to be generated. `blast` corresponds to Specify the type of output file to be generated. `blast` corresponds to
@ -38,10 +38,34 @@ input:
pattern: "blast|xml|txt|daa|sam|tsv|paf" pattern: "blast|xml|txt|daa|sam|tsv|paf"
output: output:
- blast:
type: file
description: File containing blastp hits
pattern: "*.{blast}"
- xml:
type: file
description: File containing blastp hits
pattern: "*.{xml}"
- txt: - txt:
type: file type: file
description: File containing blastx hits description: File containing hits in tabular BLAST format.
pattern: "*.{blastx.txt}" pattern: "*.{txt}"
- daa:
type: file
description: File containing hits DAA format
pattern: "*.{daa}"
- sam:
type: file
description: File containing aligned reads in SAM format
pattern: "*.{sam}"
- tsv:
type: file
description: Tab separated file containing taxonomic classification of hits
pattern: "*.{tsv}"
- paf:
type: file
description: File containing aligned reads in pairwise mapping format format
pattern: "*.{paf}"
- versions: - versions:
type: file type: file
description: File containing software versions description: File containing software versions

View file

@ -10,12 +10,15 @@ process KRAKEN2_KRAKEN2 {
input: input:
tuple val(meta), path(reads) tuple val(meta), path(reads)
path db path db
val save_output_fastqs
val save_reads_assignment
output: output:
tuple val(meta), path('*classified*') , emit: classified tuple val(meta), path('*classified*') , optional:true, emit: classified_reads_fastq
tuple val(meta), path('*unclassified*'), emit: unclassified tuple val(meta), path('*unclassified*') , optional:true, emit: unclassified_reads_fastq
tuple val(meta), path('*report.txt') , emit: txt tuple val(meta), path('*classifiedreads*'), optional:true, emit: classified_reads_assignment
path "versions.yml" , emit: versions tuple val(meta), path('*report.txt') , emit: report
path "versions.yml" , emit: versions
when: when:
task.ext.when == null || task.ext.when task.ext.when == null || task.ext.when
@ -26,19 +29,25 @@ process KRAKEN2_KRAKEN2 {
def paired = meta.single_end ? "" : "--paired" def paired = meta.single_end ? "" : "--paired"
def classified = meta.single_end ? "${prefix}.classified.fastq" : "${prefix}.classified#.fastq" def classified = meta.single_end ? "${prefix}.classified.fastq" : "${prefix}.classified#.fastq"
def unclassified = meta.single_end ? "${prefix}.unclassified.fastq" : "${prefix}.unclassified#.fastq" def unclassified = meta.single_end ? "${prefix}.unclassified.fastq" : "${prefix}.unclassified#.fastq"
def classified_command = save_output_fastqs ? "--classified-out ${classified}" : ""
def unclassified_command = save_output_fastqs ? "--unclassified-out ${unclassified}" : ""
def readclassification_command = save_reads_assignment ? "--output ${prefix}.kraken2.classifiedreads.txt" : ""
def compress_reads_command = save_output_fastqs ? "pigz -p $task.cpus *.fastq" : ""
""" """
kraken2 \\ kraken2 \\
--db $db \\ --db $db \\
--threads $task.cpus \\ --threads $task.cpus \\
--unclassified-out $unclassified \\
--classified-out $classified \\
--report ${prefix}.kraken2.report.txt \\ --report ${prefix}.kraken2.report.txt \\
--gzip-compressed \\ --gzip-compressed \\
$unclassified_command \\
$classified_command \\
$readclassification_command \\
$paired \\ $paired \\
$args \\ $args \\
$reads $reads
pigz -p $task.cpus *.fastq $compress_reads_command
cat <<-END_VERSIONS > versions.yml cat <<-END_VERSIONS > versions.yml
"${task.process}": "${task.process}":

View file

@ -27,25 +27,40 @@ input:
- db: - db:
type: directory type: directory
description: Kraken2 database description: Kraken2 database
- save_output_fastqs:
type: boolean
description: |
If true, optional commands are added to save classified and unclassified reads
as fastq files
- save_reads_assignment:
type: boolean
description: |
If true, an optional command is added to save a file reporting the taxonomic
classification of each input read
output: output:
- meta: - meta:
type: map type: map
description: | description: |
Groovy Map containing sample information Groovy Map containing sample information
e.g. [ id:'test', single_end:false ] e.g. [ id:'test', single_end:false ]
- classified: - classified_reads_fastq:
type: file type: file
description: | description: |
Reads classified to belong to any of the taxa Reads classified as belonging to any of the taxa
on the Kraken2 database. on the Kraken2 database.
pattern: "*{fastq.gz}" pattern: "*{fastq.gz}"
- unclassified: - unclassified_reads_fastq:
type: file type: file
description: | description: |
Reads not classified to belong to any of the taxa Reads not classified to any of the taxa
on the Kraken2 database. on the Kraken2 database.
pattern: "*{fastq.gz}" pattern: "*{fastq.gz}"
- txt: - classified_reads_assignment:
type: file
description: |
Kraken2 output file indicating the taxonomic assignment of
each input read
- report:
type: file type: file
description: | description: |
Kraken2 report containing stats about classified Kraken2 report containing stats about classified

View file

@ -94,16 +94,17 @@ params {
// MALT // MALT
run_malt = false run_malt = false
malt_mode = 'BlastN' malt_mode = 'BlastN'
malt_generatemegansummary = false malt_generate_megansummary = false
malt_save_reads = false
// kraken2 // kraken2
run_kraken2 = false run_kraken2 = false
kraken2_save_reads = false
kraken2_save_readclassification = false
// centrifuge // centrifuge
run_centrifuge = false run_centrifuge = false
centrifuge_save_unaligned = false centrifuge_save_reads = false
centrifuge_save_aligned = false
centrifuge_sam_format = false
// metaphlan3 // metaphlan3
run_metaphlan3 = false run_metaphlan3 = false
@ -114,7 +115,8 @@ params {
// diamond // diamond
run_diamond = false run_diamond = false
diamond_output_format = 'txt' diamond_output_format = 'tsv' // TSV is only format with taxonomic information apparently
diamond_save_reads = false // this will override diamound output format so no taxonomic profile is generated!
} }
// Load base.config by default for all pipelines // Load base.config by default for all pipelines

View file

@ -10,7 +10,10 @@
"type": "object", "type": "object",
"fa_icon": "fas fa-terminal", "fa_icon": "fas fa-terminal",
"description": "Define where the pipeline should find input data and save output data.", "description": "Define where the pipeline should find input data and save output data.",
"required": ["input", "outdir"], "required": [
"input",
"outdir"
],
"properties": { "properties": {
"input": { "input": {
"type": "string", "type": "string",
@ -173,7 +176,14 @@
"description": "Method used to save pipeline results to output directory.", "description": "Method used to save pipeline results to output directory.",
"help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.",
"fa_icon": "fas fa-copy", "fa_icon": "fas fa-copy",
"enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], "enum": [
"symlink",
"rellink",
"link",
"copy",
"copyNoFollow",
"move"
],
"hidden": true "hidden": true
}, },
"email_on_fail": { "email_on_fail": {
@ -278,15 +288,6 @@
"run_centrifuge": { "run_centrifuge": {
"type": "boolean" "type": "boolean"
}, },
"centrifuge_save_unaligned": {
"type": "boolean"
},
"centrifuge_save_aligned": {
"type": "boolean"
},
"centrifuge_sam_format": {
"type": "boolean"
},
"run_metaphlan3": { "run_metaphlan3": {
"type": "boolean", "type": "boolean",
"description": "Enable MetaPhlAn for taxonomic profiling" "description": "Enable MetaPhlAn for taxonomic profiling"
@ -294,7 +295,10 @@
"shortread_clipmerge_tool": { "shortread_clipmerge_tool": {
"type": "string", "type": "string",
"default": "fastp", "default": "fastp",
"enum": ["fastp", "adapterremoval"] "enum": [
"fastp",
"adapterremoval"
]
}, },
"shortread_clipmerge_skipadaptertrim": { "shortread_clipmerge_skipadaptertrim": {
"type": "boolean" "type": "boolean"
@ -335,7 +339,10 @@
"shortread_complexityfilter_prinseqplusplus_mode": { "shortread_complexityfilter_prinseqplusplus_mode": {
"type": "string", "type": "string",
"default": "entropy", "default": "entropy",
"enum": ["entropy", "dust"] "enum": [
"entropy",
"dust"
]
}, },
"shortread_complexityfilter_prinseqplusplus_dustscore": { "shortread_complexityfilter_prinseqplusplus_dustscore": {
"type": "number", "type": "number",
@ -385,13 +392,20 @@
"run_kaiju": { "run_kaiju": {
"type": "boolean" "type": "boolean"
}, },
"malt_generatemegansummary": { "malt_generate_megansummary": {
"type": "boolean" "type": "boolean"
}, },
"kaiju_taxon_name": { "kaiju_taxon_name": {
"type": "string", "type": "string",
"default": "species", "default": "species",
"enum": ["phylum", "class", "order", "family", "genus", "species"] "enum": [
"phylum",
"class",
"order",
"family",
"genus",
"species"
]
}, },
"run_diamond": { "run_diamond": {
"type": "boolean" "type": "boolean"
@ -399,11 +413,34 @@
"diamond_output_format": { "diamond_output_format": {
"type": "string", "type": "string",
"default": "tsv", "default": "tsv",
"enum": ["blast", "xml", "txt", "daa", "sam", "tsv", "paf"] "enum": [
"blast",
"xml",
"txt",
"daa",
"sam",
"tsv",
"paf"
]
}, },
"longread_hostremoval_index": { "longread_hostremoval_index": {
"type": "string", "type": "string",
"default": "None" "default": "None"
},
"malt_save_reads": {
"type": "boolean"
},
"kraken2_save_reads": {
"type": "boolean"
},
"kraken2_save_readclassification": {
"type": "boolean"
},
"centrifuge_save_reads": {
"type": "boolean"
},
"diamond_save_reads": {
"type": "boolean"
} }
} }
} }

View file

@ -65,10 +65,18 @@ workflow PROFILING {
ch_input_for_malt = ch_input_for_profiling.malt ch_input_for_malt = ch_input_for_profiling.malt
.filter { it[0]['instrument_platform'] == 'ILLUMINA' } .filter { it[0]['instrument_platform'] == 'ILLUMINA' }
.map { .map {
it -> meta, reads, db_meta, db ->
def temp_meta = [ id: it[2]['db_name']] + it[2] def sam_format = params.malt_save_reads ? ' --alignments' : ""
def db = it[3] // TODO No MALT SAM?
[ temp_meta, it[1], db ] // TODO check all aligned reads published
// TODO try turning on/off aligned reads
// TODO wut? [9a/a441d6] Submitted process > NFCORE_TAXPROFILER:TAXPROFILER:PROFILING:MALT_RUN (null)
def temp_meta = [ id: meta['db_name'] ]
def new_db_meta = db_meta.clone()
new_db_meta['db_params'] = db_meta['db_params'] + sam_format
def new_meta = temp_meta + new_db_meta
[ new_meta, reads, db ]
} }
.groupTuple(by: [0,2]) .groupTuple(by: [0,2])
.multiMap { .multiMap {
@ -92,7 +100,7 @@ workflow PROFILING {
[ meta_new, rma ] [ meta_new, rma ]
} }
MEGAN_RMA2INFO (ch_maltrun_for_megan, params.malt_generatemegansummary ) MEGAN_RMA2INFO (ch_maltrun_for_megan, params.malt_generate_megansummary )
ch_multiqc_files = ch_multiqc_files.mix( MALT_RUN.out.log.collect{it[1]}.ifEmpty([]) ) ch_multiqc_files = ch_multiqc_files.mix( MALT_RUN.out.log.collect{it[1]}.ifEmpty([]) )
ch_versions = ch_versions.mix( MALT_RUN.out.versions.first(), MEGAN_RMA2INFO.out.versions.first() ) ch_versions = ch_versions.mix( MALT_RUN.out.versions.first(), MEGAN_RMA2INFO.out.versions.first() )
ch_raw_profiles = ch_raw_profiles.mix( MEGAN_RMA2INFO.out.txt ) ch_raw_profiles = ch_raw_profiles.mix( MEGAN_RMA2INFO.out.txt )
@ -108,10 +116,10 @@ workflow PROFILING {
db: it[3] db: it[3]
} }
KRAKEN2_KRAKEN2 ( ch_input_for_kraken2.reads, ch_input_for_kraken2.db ) KRAKEN2_KRAKEN2 ( ch_input_for_kraken2.reads, ch_input_for_kraken2.db, params.kraken2_save_reads, params.kraken2_save_readclassification )
ch_multiqc_files = ch_multiqc_files.mix( KRAKEN2_KRAKEN2.out.txt.collect{it[1]}.ifEmpty([]) ) ch_multiqc_files = ch_multiqc_files.mix( KRAKEN2_KRAKEN2.out.report.collect{it[1]}.ifEmpty([]) )
ch_versions = ch_versions.mix( KRAKEN2_KRAKEN2.out.versions.first() ) ch_versions = ch_versions.mix( KRAKEN2_KRAKEN2.out.versions.first() )
ch_raw_profiles = ch_raw_profiles.mix( KRAKEN2_KRAKEN2.out.txt ) ch_raw_profiles = ch_raw_profiles.mix( KRAKEN2_KRAKEN2.out.report )
} }
@ -128,7 +136,7 @@ workflow PROFILING {
db: it[3] db: it[3]
} }
CENTRIFUGE_CENTRIFUGE ( ch_input_for_centrifuge.reads, ch_input_for_centrifuge.db, params.centrifuge_save_unaligned, params.centrifuge_save_aligned, params.centrifuge_sam_format ) CENTRIFUGE_CENTRIFUGE ( ch_input_for_centrifuge.reads, ch_input_for_centrifuge.db, params.centrifuge_save_reads, params.centrifuge_save_reads, params.centrifuge_save_reads )
CENTRIFUGE_KREPORT (CENTRIFUGE_CENTRIFUGE.out.results, ch_input_for_centrifuge.db) CENTRIFUGE_KREPORT (CENTRIFUGE_CENTRIFUGE.out.results, ch_input_for_centrifuge.db)
ch_versions = ch_versions.mix( CENTRIFUGE_CENTRIFUGE.out.versions.first() ) ch_versions = ch_versions.mix( CENTRIFUGE_CENTRIFUGE.out.versions.first() )
ch_raw_profiles = ch_raw_profiles.mix( CENTRIFUGE_KREPORT.out.kreport ) ch_raw_profiles = ch_raw_profiles.mix( CENTRIFUGE_KREPORT.out.kreport )
@ -180,9 +188,13 @@ workflow PROFILING {
db: it[3] db: it[3]
} }
DIAMOND_BLASTX ( ch_input_for_diamond.reads, ch_input_for_diamond.db, params.diamond_output_format ) // diamond only accepts single output file specification, therefore
// this will replace output file!
ch_diamond_reads_format = params.diamond_save_reads ? 'sam' : params.diamond_output_format
DIAMOND_BLASTX ( ch_input_for_diamond.reads, ch_input_for_diamond.db, params.diamond_output_format, [] )
ch_versions = ch_versions.mix( DIAMOND_BLASTX.out.versions.first() ) ch_versions = ch_versions.mix( DIAMOND_BLASTX.out.versions.first() )
ch_raw_profiles = ch_raw_profiles.mix( DIAMOND_BLASTX.out.output ) ch_raw_profiles = ch_raw_profiles.mix( DIAMOND_BLASTX.out.tsv )
} }

View file

@ -29,6 +29,8 @@ if (params.hostremoval_reference ) { ch_reference = file(params.hostre
if (params.shortread_hostremoval_index ) { ch_shortread_reference_index = file(params.shortread_hostremoval_index ) } else { ch_shortread_reference_index = [] } if (params.shortread_hostremoval_index ) { ch_shortread_reference_index = file(params.shortread_hostremoval_index ) } else { ch_shortread_reference_index = [] }
if (params.longread_hostremoval_index ) { ch_longread_reference_index = file(params.longread_hostremoval_index ) } else { ch_longread_reference_index = [] } if (params.longread_hostremoval_index ) { ch_longread_reference_index = file(params.longread_hostremoval_index ) } else { ch_longread_reference_index = [] }
if (params.diamond_save_reads ) log.warn "[nf-core/taxprofiler] DIAMOND only allows output of a single format. Only aligned reads in SAM format will be produced, no taxonomic profiles will be available."
/* /*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
CONFIG FILES CONFIG FILES