update to kraken2: breaking change - output channels renamed (#1525)

* updated kraken2 module to include optional classification of each input reads, and make fastq outputs optional
NB: this is a breaking change, because the output channels have been renamed as a consequence of changes

* updated yml

* pigz command made optional, in order to be executed only if fastq of classified/unclassified reads are saved

* updated test yaml file for kraken2

* fixed TODOs and renamed variables and outputs

* untar in conda cannot keep same md5sum of version, and therefore md5sum check removed

* improved description of the options

Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>
This commit is contained in:
Francesco L 2022-04-21 14:33:59 +02:00 committed by GitHub
parent d07d270743
commit abe025677c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 75 additions and 27 deletions

View file

@ -10,12 +10,15 @@ process KRAKEN2_KRAKEN2 {
input: input:
tuple val(meta), path(reads) tuple val(meta), path(reads)
path db path db
val save_output_fastqs
val save_reads_assignment
output: output:
tuple val(meta), path('*classified*') , emit: classified tuple val(meta), path('*classified*') , optional:true, emit: classified_reads_fastq
tuple val(meta), path('*unclassified*'), emit: unclassified tuple val(meta), path('*unclassified*') , optional:true, emit: unclassified_reads_fastq
tuple val(meta), path('*report.txt') , emit: txt tuple val(meta), path('*classifiedreads*'), optional:true, emit: classified_reads_assignment
path "versions.yml" , emit: versions tuple val(meta), path('*report.txt') , emit: report
path "versions.yml" , emit: versions
when: when:
task.ext.when == null || task.ext.when task.ext.when == null || task.ext.when
@ -26,19 +29,25 @@ process KRAKEN2_KRAKEN2 {
def paired = meta.single_end ? "" : "--paired" def paired = meta.single_end ? "" : "--paired"
def classified = meta.single_end ? "${prefix}.classified.fastq" : "${prefix}.classified#.fastq" def classified = meta.single_end ? "${prefix}.classified.fastq" : "${prefix}.classified#.fastq"
def unclassified = meta.single_end ? "${prefix}.unclassified.fastq" : "${prefix}.unclassified#.fastq" def unclassified = meta.single_end ? "${prefix}.unclassified.fastq" : "${prefix}.unclassified#.fastq"
def classified_command = save_output_fastqs ? "--classified-out ${classified}" : ""
def unclassified_command = save_output_fastqs ? "--unclassified-out ${unclassified}" : ""
def readclassification_command = save_reads_assignment ? "--output ${prefix}.kraken2.classifiedreads.txt" : ""
def compress_reads_command = save_output_fastqs ? "pigz -p $task.cpus *.fastq" : ""
""" """
kraken2 \\ kraken2 \\
--db $db \\ --db $db \\
--threads $task.cpus \\ --threads $task.cpus \\
--unclassified-out $unclassified \\
--classified-out $classified \\
--report ${prefix}.kraken2.report.txt \\ --report ${prefix}.kraken2.report.txt \\
--gzip-compressed \\ --gzip-compressed \\
$unclassified_command \\
$classified_command \\
$readclassification_command \\
$paired \\ $paired \\
$args \\ $args \\
$reads $reads
pigz -p $task.cpus *.fastq $compress_reads_command
cat <<-END_VERSIONS > versions.yml cat <<-END_VERSIONS > versions.yml
"${task.process}": "${task.process}":

View file

@ -27,25 +27,40 @@ input:
- db: - db:
type: directory type: directory
description: Kraken2 database description: Kraken2 database
- save_output_fastqs:
type: boolean
description: |
If true, optional commands are added to save classified and unclassified reads
as fastq files
- save_reads_assignment:
type: boolean
description: |
If true, an optional command is added to save a file reporting the taxonomic
classification of each input read
output: output:
- meta: - meta:
type: map type: map
description: | description: |
Groovy Map containing sample information Groovy Map containing sample information
e.g. [ id:'test', single_end:false ] e.g. [ id:'test', single_end:false ]
- classified: - classified_reads_fastq:
type: file type: file
description: | description: |
Reads classified to belong to any of the taxa Reads classified as belonging to any of the taxa
on the Kraken2 database. on the Kraken2 database.
pattern: "*{fastq.gz}" pattern: "*{fastq.gz}"
- unclassified: - unclassified_reads_fastq:
type: file type: file
description: | description: |
Reads not classified to belong to any of the taxa Reads not classified to any of the taxa
on the Kraken2 database. on the Kraken2 database.
pattern: "*{fastq.gz}" pattern: "*{fastq.gz}"
- txt: - classified_reads_assignment:
type: file
description: |
Kraken2 output file indicating the taxonomic assignment of
each input read
- report:
type: file type: file
description: | description: |
Kraken2 report containing stats about classified Kraken2 report containing stats about classified

View file

@ -12,7 +12,7 @@ workflow test_kraken2_kraken2_single_end {
db = [ [], file(params.test_data['sarscov2']['genome']['kraken2_tar_gz'], checkIfExists: true) ] db = [ [], file(params.test_data['sarscov2']['genome']['kraken2_tar_gz'], checkIfExists: true) ]
UNTAR ( db ) UNTAR ( db )
KRAKEN2_KRAKEN2 ( input, UNTAR.out.untar.map{ it[1] } ) KRAKEN2_KRAKEN2 ( input, UNTAR.out.untar.map{ it[1] }, true, false )
} }
workflow test_kraken2_kraken2_paired_end { workflow test_kraken2_kraken2_paired_end {
@ -23,5 +23,15 @@ workflow test_kraken2_kraken2_paired_end {
db = [ [], file(params.test_data['sarscov2']['genome']['kraken2_tar_gz'], checkIfExists: true) ] db = [ [], file(params.test_data['sarscov2']['genome']['kraken2_tar_gz'], checkIfExists: true) ]
UNTAR ( db ) UNTAR ( db )
KRAKEN2_KRAKEN2 ( input, UNTAR.out.untar.map{ it[1] } ) KRAKEN2_KRAKEN2 ( input, UNTAR.out.untar.map{ it[1] }, true, false )
}
workflow test_kraken2_kraken2_classifyreads {
input = [ [ id:'test', single_end:true ], // meta map
[ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ]
]
db = [ [], file(params.test_data['sarscov2']['genome']['kraken2_tar_gz'], checkIfExists: true) ]
UNTAR ( db )
KRAKEN2_KRAKEN2 ( input, UNTAR.out.untar.map{ it[1] }, false, true )
} }

View file

@ -1,29 +1,43 @@
- name: kraken2 kraken2 single-end - name: kraken2 kraken2 test_kraken2_kraken2_single_end
command: nextflow run ./tests/modules/kraken2/kraken2 -entry test_kraken2_kraken2_single_end -c ./tests/config/nextflow.config -c ./tests/modules/kraken2/kraken2/nextflow.config command: nextflow run tests/modules/kraken2/kraken2 -entry test_kraken2_kraken2_single_end -c tests/config/nextflow.config
tags: tags:
- kraken2 - kraken2
- kraken2/kraken2 - kraken2/kraken2
files: files:
- path: output/kraken2/test.classified.fastq.gz - path: output/kraken2/test.classified.fastq.gz
should_exist: true
- path: output/kraken2/test.unclassified.fastq.gz
should_exist: true
- path: output/kraken2/test.kraken2.report.txt - path: output/kraken2/test.kraken2.report.txt
md5sum: 4227755fe40478b8d7dc8634b489761e md5sum: 4227755fe40478b8d7dc8634b489761e
- path: output/kraken2/test.unclassified.fastq.gz
- path: output/kraken2/versions.yml
md5sum: 6e3ad947ac8dee841a89216071c181cc
- path: output/untar/versions.yml
- name: kraken2 kraken2 paired-end - name: kraken2 kraken2 test_kraken2_kraken2_paired_end
command: nextflow run ./tests/modules/kraken2/kraken2 -entry test_kraken2_kraken2_paired_end -c ./tests/config/nextflow.config -c ./tests/modules/kraken2/kraken2/nextflow.config command: nextflow run tests/modules/kraken2/kraken2 -entry test_kraken2_kraken2_paired_end -c tests/config/nextflow.config
tags: tags:
- kraken2 - kraken2
- kraken2/kraken2 - kraken2/kraken2
files: files:
- path: output/kraken2/test.classified_1.fastq.gz - path: output/kraken2/test.classified_1.fastq.gz
should_exist: true
- path: output/kraken2/test.classified_2.fastq.gz - path: output/kraken2/test.classified_2.fastq.gz
should_exist: true
- path: output/kraken2/test.unclassified_1.fastq.gz
should_exist: true
- path: output/kraken2/test.unclassified_2.fastq.gz
should_exist: true
- path: output/kraken2/test.kraken2.report.txt - path: output/kraken2/test.kraken2.report.txt
md5sum: 4227755fe40478b8d7dc8634b489761e md5sum: 4227755fe40478b8d7dc8634b489761e
- path: output/kraken2/test.unclassified_1.fastq.gz
- path: output/kraken2/test.unclassified_2.fastq.gz
- path: output/kraken2/versions.yml
md5sum: 604482fe7a4519f890fae9c8beb1bd6e
- path: output/untar/versions.yml
- name: kraken2 kraken2 test_kraken2_kraken2_classifyreads
command: nextflow run tests/modules/kraken2/kraken2 -entry test_kraken2_kraken2_classifyreads -c tests/config/nextflow.config
tags:
- kraken2
- kraken2/kraken2
files:
- path: output/kraken2/test.kraken2.classifiedreads.txt
md5sum: e7a90531f0d8d777316515c36fe4cae0
- path: output/kraken2/test.kraken2.report.txt
md5sum: 4227755fe40478b8d7dc8634b489761e
- path: output/kraken2/versions.yml
md5sum: 3488c304259e83c5bea573403293fce9
- path: output/untar/versions.yml