mirror of
https://github.com/MillironX/nf-core_modules.git
synced 2024-12-22 02:58:17 +00:00
update to kraken2: breaking change - output channels renamed (#1525)
* updated kraken2 module to include optional classification of each input reads, and make fastq outputs optional NB: this is a breaking change, because the output channels have been renamed as a consequence of changes * updated yml * pigz command made optional, in order to be executed only if fastq of classified/unclassified reads are saved * updated test yaml file for kraken2 * fixed TODOs and renamed variables and outputs * untar in conda cannot keep same md5sum of version, and therefore md5sum check removed * improved description of the options Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>
This commit is contained in:
parent
d07d270743
commit
abe025677c
4 changed files with 75 additions and 27 deletions
|
@ -10,12 +10,15 @@ process KRAKEN2_KRAKEN2 {
|
||||||
input:
|
input:
|
||||||
tuple val(meta), path(reads)
|
tuple val(meta), path(reads)
|
||||||
path db
|
path db
|
||||||
|
val save_output_fastqs
|
||||||
|
val save_reads_assignment
|
||||||
|
|
||||||
output:
|
output:
|
||||||
tuple val(meta), path('*classified*') , emit: classified
|
tuple val(meta), path('*classified*') , optional:true, emit: classified_reads_fastq
|
||||||
tuple val(meta), path('*unclassified*'), emit: unclassified
|
tuple val(meta), path('*unclassified*') , optional:true, emit: unclassified_reads_fastq
|
||||||
tuple val(meta), path('*report.txt') , emit: txt
|
tuple val(meta), path('*classifiedreads*'), optional:true, emit: classified_reads_assignment
|
||||||
path "versions.yml" , emit: versions
|
tuple val(meta), path('*report.txt') , emit: report
|
||||||
|
path "versions.yml" , emit: versions
|
||||||
|
|
||||||
when:
|
when:
|
||||||
task.ext.when == null || task.ext.when
|
task.ext.when == null || task.ext.when
|
||||||
|
@ -26,19 +29,25 @@ process KRAKEN2_KRAKEN2 {
|
||||||
def paired = meta.single_end ? "" : "--paired"
|
def paired = meta.single_end ? "" : "--paired"
|
||||||
def classified = meta.single_end ? "${prefix}.classified.fastq" : "${prefix}.classified#.fastq"
|
def classified = meta.single_end ? "${prefix}.classified.fastq" : "${prefix}.classified#.fastq"
|
||||||
def unclassified = meta.single_end ? "${prefix}.unclassified.fastq" : "${prefix}.unclassified#.fastq"
|
def unclassified = meta.single_end ? "${prefix}.unclassified.fastq" : "${prefix}.unclassified#.fastq"
|
||||||
|
def classified_command = save_output_fastqs ? "--classified-out ${classified}" : ""
|
||||||
|
def unclassified_command = save_output_fastqs ? "--unclassified-out ${unclassified}" : ""
|
||||||
|
def readclassification_command = save_reads_assignment ? "--output ${prefix}.kraken2.classifiedreads.txt" : ""
|
||||||
|
def compress_reads_command = save_output_fastqs ? "pigz -p $task.cpus *.fastq" : ""
|
||||||
|
|
||||||
"""
|
"""
|
||||||
kraken2 \\
|
kraken2 \\
|
||||||
--db $db \\
|
--db $db \\
|
||||||
--threads $task.cpus \\
|
--threads $task.cpus \\
|
||||||
--unclassified-out $unclassified \\
|
|
||||||
--classified-out $classified \\
|
|
||||||
--report ${prefix}.kraken2.report.txt \\
|
--report ${prefix}.kraken2.report.txt \\
|
||||||
--gzip-compressed \\
|
--gzip-compressed \\
|
||||||
|
$unclassified_command \\
|
||||||
|
$classified_command \\
|
||||||
|
$readclassification_command \\
|
||||||
$paired \\
|
$paired \\
|
||||||
$args \\
|
$args \\
|
||||||
$reads
|
$reads
|
||||||
|
|
||||||
pigz -p $task.cpus *.fastq
|
$compress_reads_command
|
||||||
|
|
||||||
cat <<-END_VERSIONS > versions.yml
|
cat <<-END_VERSIONS > versions.yml
|
||||||
"${task.process}":
|
"${task.process}":
|
||||||
|
|
|
@ -27,25 +27,40 @@ input:
|
||||||
- db:
|
- db:
|
||||||
type: directory
|
type: directory
|
||||||
description: Kraken2 database
|
description: Kraken2 database
|
||||||
|
- save_output_fastqs:
|
||||||
|
type: boolean
|
||||||
|
description: |
|
||||||
|
If true, optional commands are added to save classified and unclassified reads
|
||||||
|
as fastq files
|
||||||
|
- save_reads_assignment:
|
||||||
|
type: boolean
|
||||||
|
description: |
|
||||||
|
If true, an optional command is added to save a file reporting the taxonomic
|
||||||
|
classification of each input read
|
||||||
output:
|
output:
|
||||||
- meta:
|
- meta:
|
||||||
type: map
|
type: map
|
||||||
description: |
|
description: |
|
||||||
Groovy Map containing sample information
|
Groovy Map containing sample information
|
||||||
e.g. [ id:'test', single_end:false ]
|
e.g. [ id:'test', single_end:false ]
|
||||||
- classified:
|
- classified_reads_fastq:
|
||||||
type: file
|
type: file
|
||||||
description: |
|
description: |
|
||||||
Reads classified to belong to any of the taxa
|
Reads classified as belonging to any of the taxa
|
||||||
on the Kraken2 database.
|
on the Kraken2 database.
|
||||||
pattern: "*{fastq.gz}"
|
pattern: "*{fastq.gz}"
|
||||||
- unclassified:
|
- unclassified_reads_fastq:
|
||||||
type: file
|
type: file
|
||||||
description: |
|
description: |
|
||||||
Reads not classified to belong to any of the taxa
|
Reads not classified to any of the taxa
|
||||||
on the Kraken2 database.
|
on the Kraken2 database.
|
||||||
pattern: "*{fastq.gz}"
|
pattern: "*{fastq.gz}"
|
||||||
- txt:
|
- classified_reads_assignment:
|
||||||
|
type: file
|
||||||
|
description: |
|
||||||
|
Kraken2 output file indicating the taxonomic assignment of
|
||||||
|
each input read
|
||||||
|
- report:
|
||||||
type: file
|
type: file
|
||||||
description: |
|
description: |
|
||||||
Kraken2 report containing stats about classified
|
Kraken2 report containing stats about classified
|
||||||
|
|
|
@ -12,7 +12,7 @@ workflow test_kraken2_kraken2_single_end {
|
||||||
db = [ [], file(params.test_data['sarscov2']['genome']['kraken2_tar_gz'], checkIfExists: true) ]
|
db = [ [], file(params.test_data['sarscov2']['genome']['kraken2_tar_gz'], checkIfExists: true) ]
|
||||||
|
|
||||||
UNTAR ( db )
|
UNTAR ( db )
|
||||||
KRAKEN2_KRAKEN2 ( input, UNTAR.out.untar.map{ it[1] } )
|
KRAKEN2_KRAKEN2 ( input, UNTAR.out.untar.map{ it[1] }, true, false )
|
||||||
}
|
}
|
||||||
|
|
||||||
workflow test_kraken2_kraken2_paired_end {
|
workflow test_kraken2_kraken2_paired_end {
|
||||||
|
@ -23,5 +23,15 @@ workflow test_kraken2_kraken2_paired_end {
|
||||||
db = [ [], file(params.test_data['sarscov2']['genome']['kraken2_tar_gz'], checkIfExists: true) ]
|
db = [ [], file(params.test_data['sarscov2']['genome']['kraken2_tar_gz'], checkIfExists: true) ]
|
||||||
|
|
||||||
UNTAR ( db )
|
UNTAR ( db )
|
||||||
KRAKEN2_KRAKEN2 ( input, UNTAR.out.untar.map{ it[1] } )
|
KRAKEN2_KRAKEN2 ( input, UNTAR.out.untar.map{ it[1] }, true, false )
|
||||||
|
}
|
||||||
|
|
||||||
|
workflow test_kraken2_kraken2_classifyreads {
|
||||||
|
input = [ [ id:'test', single_end:true ], // meta map
|
||||||
|
[ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ]
|
||||||
|
]
|
||||||
|
db = [ [], file(params.test_data['sarscov2']['genome']['kraken2_tar_gz'], checkIfExists: true) ]
|
||||||
|
|
||||||
|
UNTAR ( db )
|
||||||
|
KRAKEN2_KRAKEN2 ( input, UNTAR.out.untar.map{ it[1] }, false, true )
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,29 +1,43 @@
|
||||||
- name: kraken2 kraken2 single-end
|
- name: kraken2 kraken2 test_kraken2_kraken2_single_end
|
||||||
command: nextflow run ./tests/modules/kraken2/kraken2 -entry test_kraken2_kraken2_single_end -c ./tests/config/nextflow.config -c ./tests/modules/kraken2/kraken2/nextflow.config
|
command: nextflow run tests/modules/kraken2/kraken2 -entry test_kraken2_kraken2_single_end -c tests/config/nextflow.config
|
||||||
tags:
|
tags:
|
||||||
- kraken2
|
- kraken2
|
||||||
- kraken2/kraken2
|
- kraken2/kraken2
|
||||||
files:
|
files:
|
||||||
- path: output/kraken2/test.classified.fastq.gz
|
- path: output/kraken2/test.classified.fastq.gz
|
||||||
should_exist: true
|
|
||||||
- path: output/kraken2/test.unclassified.fastq.gz
|
|
||||||
should_exist: true
|
|
||||||
- path: output/kraken2/test.kraken2.report.txt
|
- path: output/kraken2/test.kraken2.report.txt
|
||||||
md5sum: 4227755fe40478b8d7dc8634b489761e
|
md5sum: 4227755fe40478b8d7dc8634b489761e
|
||||||
|
- path: output/kraken2/test.unclassified.fastq.gz
|
||||||
|
- path: output/kraken2/versions.yml
|
||||||
|
md5sum: 6e3ad947ac8dee841a89216071c181cc
|
||||||
|
- path: output/untar/versions.yml
|
||||||
|
|
||||||
- name: kraken2 kraken2 paired-end
|
- name: kraken2 kraken2 test_kraken2_kraken2_paired_end
|
||||||
command: nextflow run ./tests/modules/kraken2/kraken2 -entry test_kraken2_kraken2_paired_end -c ./tests/config/nextflow.config -c ./tests/modules/kraken2/kraken2/nextflow.config
|
command: nextflow run tests/modules/kraken2/kraken2 -entry test_kraken2_kraken2_paired_end -c tests/config/nextflow.config
|
||||||
tags:
|
tags:
|
||||||
- kraken2
|
- kraken2
|
||||||
- kraken2/kraken2
|
- kraken2/kraken2
|
||||||
files:
|
files:
|
||||||
- path: output/kraken2/test.classified_1.fastq.gz
|
- path: output/kraken2/test.classified_1.fastq.gz
|
||||||
should_exist: true
|
|
||||||
- path: output/kraken2/test.classified_2.fastq.gz
|
- path: output/kraken2/test.classified_2.fastq.gz
|
||||||
should_exist: true
|
|
||||||
- path: output/kraken2/test.unclassified_1.fastq.gz
|
|
||||||
should_exist: true
|
|
||||||
- path: output/kraken2/test.unclassified_2.fastq.gz
|
|
||||||
should_exist: true
|
|
||||||
- path: output/kraken2/test.kraken2.report.txt
|
- path: output/kraken2/test.kraken2.report.txt
|
||||||
md5sum: 4227755fe40478b8d7dc8634b489761e
|
md5sum: 4227755fe40478b8d7dc8634b489761e
|
||||||
|
- path: output/kraken2/test.unclassified_1.fastq.gz
|
||||||
|
- path: output/kraken2/test.unclassified_2.fastq.gz
|
||||||
|
- path: output/kraken2/versions.yml
|
||||||
|
md5sum: 604482fe7a4519f890fae9c8beb1bd6e
|
||||||
|
- path: output/untar/versions.yml
|
||||||
|
|
||||||
|
- name: kraken2 kraken2 test_kraken2_kraken2_classifyreads
|
||||||
|
command: nextflow run tests/modules/kraken2/kraken2 -entry test_kraken2_kraken2_classifyreads -c tests/config/nextflow.config
|
||||||
|
tags:
|
||||||
|
- kraken2
|
||||||
|
- kraken2/kraken2
|
||||||
|
files:
|
||||||
|
- path: output/kraken2/test.kraken2.classifiedreads.txt
|
||||||
|
md5sum: e7a90531f0d8d777316515c36fe4cae0
|
||||||
|
- path: output/kraken2/test.kraken2.report.txt
|
||||||
|
md5sum: 4227755fe40478b8d7dc8634b489761e
|
||||||
|
- path: output/kraken2/versions.yml
|
||||||
|
md5sum: 3488c304259e83c5bea573403293fce9
|
||||||
|
- path: output/untar/versions.yml
|
||||||
|
|
Loading…
Reference in a new issue