From abe025677cdd805cc93032341ab19885473c1a07 Mon Sep 17 00:00:00 2001 From: Francesco L <53608000+lescai@users.noreply.github.com> Date: Thu, 21 Apr 2022 14:33:59 +0200 Subject: [PATCH] update to kraken2: breaking change - output channels renamed (#1525) * updated kraken2 module to include optional classification of each input reads, and make fastq outputs optional NB: this is a breaking change, because the output channels have been renamed as a consequence of changes * updated yml * pigz command made optional, in order to be executed only if fastq of classified/unclassified reads are saved * updated test yaml file for kraken2 * fixed TODOs and renamed variables and outputs * untar in conda cannot keep same md5sum of version, and therefore md5sum check removed * improved description of the options Co-authored-by: James A. Fellows Yates --- modules/kraken2/kraken2/main.nf | 23 ++++++++++----- modules/kraken2/kraken2/meta.yml | 25 ++++++++++++---- tests/modules/kraken2/kraken2/main.nf | 14 +++++++-- tests/modules/kraken2/kraken2/test.yml | 40 +++++++++++++++++--------- 4 files changed, 75 insertions(+), 27 deletions(-) diff --git a/modules/kraken2/kraken2/main.nf b/modules/kraken2/kraken2/main.nf index 3ec5df52..d4000233 100644 --- a/modules/kraken2/kraken2/main.nf +++ b/modules/kraken2/kraken2/main.nf @@ -10,12 +10,15 @@ process KRAKEN2_KRAKEN2 { input: tuple val(meta), path(reads) path db + val save_output_fastqs + val save_reads_assignment output: - tuple val(meta), path('*classified*') , emit: classified - tuple val(meta), path('*unclassified*'), emit: unclassified - tuple val(meta), path('*report.txt') , emit: txt - path "versions.yml" , emit: versions + tuple val(meta), path('*classified*') , optional:true, emit: classified_reads_fastq + tuple val(meta), path('*unclassified*') , optional:true, emit: unclassified_reads_fastq + tuple val(meta), path('*classifiedreads*'), optional:true, emit: classified_reads_assignment + tuple val(meta), path('*report.txt') , emit: report + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -26,19 +29,25 @@ process KRAKEN2_KRAKEN2 { def paired = meta.single_end ? "" : "--paired" def classified = meta.single_end ? "${prefix}.classified.fastq" : "${prefix}.classified#.fastq" def unclassified = meta.single_end ? "${prefix}.unclassified.fastq" : "${prefix}.unclassified#.fastq" + def classified_command = save_output_fastqs ? "--classified-out ${classified}" : "" + def unclassified_command = save_output_fastqs ? "--unclassified-out ${unclassified}" : "" + def readclassification_command = save_reads_assignment ? "--output ${prefix}.kraken2.classifiedreads.txt" : "" + def compress_reads_command = save_output_fastqs ? "pigz -p $task.cpus *.fastq" : "" + """ kraken2 \\ --db $db \\ --threads $task.cpus \\ - --unclassified-out $unclassified \\ - --classified-out $classified \\ --report ${prefix}.kraken2.report.txt \\ --gzip-compressed \\ + $unclassified_command \\ + $classified_command \\ + $readclassification_command \\ $paired \\ $args \\ $reads - pigz -p $task.cpus *.fastq + $compress_reads_command cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/kraken2/kraken2/meta.yml b/modules/kraken2/kraken2/meta.yml index 9d6a3855..7129fe3a 100644 --- a/modules/kraken2/kraken2/meta.yml +++ b/modules/kraken2/kraken2/meta.yml @@ -27,25 +27,40 @@ input: - db: type: directory description: Kraken2 database + - save_output_fastqs: + type: boolean + description: | + If true, optional commands are added to save classified and unclassified reads + as fastq files + - save_reads_assignment: + type: boolean + description: | + If true, an optional command is added to save a file reporting the taxonomic + classification of each input read output: - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - classified: + - classified_reads_fastq: type: file description: | - Reads classified to belong to any of the taxa + Reads classified as belonging to any of the taxa on the Kraken2 database. pattern: "*{fastq.gz}" - - unclassified: + - unclassified_reads_fastq: type: file description: | - Reads not classified to belong to any of the taxa + Reads not classified to any of the taxa on the Kraken2 database. pattern: "*{fastq.gz}" - - txt: + - classified_reads_assignment: + type: file + description: | + Kraken2 output file indicating the taxonomic assignment of + each input read + - report: type: file description: | Kraken2 report containing stats about classified diff --git a/tests/modules/kraken2/kraken2/main.nf b/tests/modules/kraken2/kraken2/main.nf index 94f4db95..4a3593e4 100644 --- a/tests/modules/kraken2/kraken2/main.nf +++ b/tests/modules/kraken2/kraken2/main.nf @@ -12,7 +12,7 @@ workflow test_kraken2_kraken2_single_end { db = [ [], file(params.test_data['sarscov2']['genome']['kraken2_tar_gz'], checkIfExists: true) ] UNTAR ( db ) - KRAKEN2_KRAKEN2 ( input, UNTAR.out.untar.map{ it[1] } ) + KRAKEN2_KRAKEN2 ( input, UNTAR.out.untar.map{ it[1] }, true, false ) } workflow test_kraken2_kraken2_paired_end { @@ -23,5 +23,15 @@ workflow test_kraken2_kraken2_paired_end { db = [ [], file(params.test_data['sarscov2']['genome']['kraken2_tar_gz'], checkIfExists: true) ] UNTAR ( db ) - KRAKEN2_KRAKEN2 ( input, UNTAR.out.untar.map{ it[1] } ) + KRAKEN2_KRAKEN2 ( input, UNTAR.out.untar.map{ it[1] }, true, false ) +} + +workflow test_kraken2_kraken2_classifyreads { + input = [ [ id:'test', single_end:true ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] + ] + db = [ [], file(params.test_data['sarscov2']['genome']['kraken2_tar_gz'], checkIfExists: true) ] + + UNTAR ( db ) + KRAKEN2_KRAKEN2 ( input, UNTAR.out.untar.map{ it[1] }, false, true ) } diff --git a/tests/modules/kraken2/kraken2/test.yml b/tests/modules/kraken2/kraken2/test.yml index 1ec413bf..af1e6e0d 100644 --- a/tests/modules/kraken2/kraken2/test.yml +++ b/tests/modules/kraken2/kraken2/test.yml @@ -1,29 +1,43 @@ -- name: kraken2 kraken2 single-end - command: nextflow run ./tests/modules/kraken2/kraken2 -entry test_kraken2_kraken2_single_end -c ./tests/config/nextflow.config -c ./tests/modules/kraken2/kraken2/nextflow.config +- name: kraken2 kraken2 test_kraken2_kraken2_single_end + command: nextflow run tests/modules/kraken2/kraken2 -entry test_kraken2_kraken2_single_end -c tests/config/nextflow.config tags: - kraken2 - kraken2/kraken2 files: - path: output/kraken2/test.classified.fastq.gz - should_exist: true - - path: output/kraken2/test.unclassified.fastq.gz - should_exist: true - path: output/kraken2/test.kraken2.report.txt md5sum: 4227755fe40478b8d7dc8634b489761e + - path: output/kraken2/test.unclassified.fastq.gz + - path: output/kraken2/versions.yml + md5sum: 6e3ad947ac8dee841a89216071c181cc + - path: output/untar/versions.yml -- name: kraken2 kraken2 paired-end - command: nextflow run ./tests/modules/kraken2/kraken2 -entry test_kraken2_kraken2_paired_end -c ./tests/config/nextflow.config -c ./tests/modules/kraken2/kraken2/nextflow.config +- name: kraken2 kraken2 test_kraken2_kraken2_paired_end + command: nextflow run tests/modules/kraken2/kraken2 -entry test_kraken2_kraken2_paired_end -c tests/config/nextflow.config tags: - kraken2 - kraken2/kraken2 files: - path: output/kraken2/test.classified_1.fastq.gz - should_exist: true - path: output/kraken2/test.classified_2.fastq.gz - should_exist: true - - path: output/kraken2/test.unclassified_1.fastq.gz - should_exist: true - - path: output/kraken2/test.unclassified_2.fastq.gz - should_exist: true - path: output/kraken2/test.kraken2.report.txt md5sum: 4227755fe40478b8d7dc8634b489761e + - path: output/kraken2/test.unclassified_1.fastq.gz + - path: output/kraken2/test.unclassified_2.fastq.gz + - path: output/kraken2/versions.yml + md5sum: 604482fe7a4519f890fae9c8beb1bd6e + - path: output/untar/versions.yml + +- name: kraken2 kraken2 test_kraken2_kraken2_classifyreads + command: nextflow run tests/modules/kraken2/kraken2 -entry test_kraken2_kraken2_classifyreads -c tests/config/nextflow.config + tags: + - kraken2 + - kraken2/kraken2 + files: + - path: output/kraken2/test.kraken2.classifiedreads.txt + md5sum: e7a90531f0d8d777316515c36fe4cae0 + - path: output/kraken2/test.kraken2.report.txt + md5sum: 4227755fe40478b8d7dc8634b489761e + - path: output/kraken2/versions.yml + md5sum: 3488c304259e83c5bea573403293fce9 + - path: output/untar/versions.yml