From 5b80c7cab9e65702338f2c93e1f9dcfe06ecba28 Mon Sep 17 00:00:00 2001 From: "Thomas A. Christensen II" <25492070+MillironX@users.noreply.github.com> Date: Mon, 27 Jun 2022 08:58:28 -0500 Subject: [PATCH 1/5] Install KAIJU_KAIJU2KRONA --- modules.json | 3 ++ .../nf-core/modules/kaiju/kaiju2krona/main.nf | 39 ++++++++++++++++ .../modules/kaiju/kaiju2krona/meta.yml | 44 +++++++++++++++++++ 3 files changed, 86 insertions(+) create mode 100644 modules/nf-core/modules/kaiju/kaiju2krona/main.nf create mode 100644 modules/nf-core/modules/kaiju/kaiju2krona/meta.yml diff --git a/modules.json b/modules.json index 4956cc8..1770a53 100644 --- a/modules.json +++ b/modules.json @@ -42,6 +42,9 @@ "kaiju/kaiju": { "git_sha": "8856f127c58f6af479128be8b8df4d42e442ddbe" }, + "kaiju/kaiju2krona": { + "git_sha": "2f0b19240430de6807b1232e6d9d0e8084e8a28f" + }, "kaiju/kaiju2table": { "git_sha": "538dbac98ba9c8f799536cd5a617195501439457" }, diff --git a/modules/nf-core/modules/kaiju/kaiju2krona/main.nf b/modules/nf-core/modules/kaiju/kaiju2krona/main.nf new file mode 100644 index 0000000..c95d5a7 --- /dev/null +++ b/modules/nf-core/modules/kaiju/kaiju2krona/main.nf @@ -0,0 +1,39 @@ +process KAIJU_KAIJU2KRONA { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::kaiju=1.8.2" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/kaiju:1.8.2--h5b5514e_1': + 'quay.io/biocontainers/kaiju:1.8.2--h5b5514e_1' }" + + input: + tuple val(meta), path(tsv) + path(db) + + output: + tuple val(meta), path("*.txt"), emit: txt + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + dbnodes=`find -L ${db} -name "*nodes.dmp"` + dbnames=`find -L ${db} -name "*names.dmp"` + kaiju2krona \\ + $args \\ + -t \$dbnodes \\ + -n \$dbnames \\ + -i ${tsv} \\ + -o ${prefix}.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + kaiju: \$(echo \$( kaiju -h 2>&1 | sed -n 1p | sed 's/^.*Kaiju //' )) + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/kaiju/kaiju2krona/meta.yml b/modules/nf-core/modules/kaiju/kaiju2krona/meta.yml new file mode 100644 index 0000000..a0dc2fd --- /dev/null +++ b/modules/nf-core/modules/kaiju/kaiju2krona/meta.yml @@ -0,0 +1,44 @@ +name: kaiju_kaiju2krona +description: Convert Kaiju's tab-separated output file into a tab-separated text file which can be imported into Krona. +keywords: + - taxonomy + - visualisation + - krona chart + - metagenomics +tools: + - "kaiju": + description: Fast and sensitive taxonomic classification for metagenomics + homepage: https://kaiju.binf.ku.dk/ + documentation: https://github.com/bioinformatics-centre/kaiju/blob/master/README.md + tool_dev_url: https://github.com/bioinformatics-centre/kaiju + doi: "10.1038/ncomms11257" + licence: ["GNU GPL v3"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - tsv: + type: file + description: Kaiju tab-separated output file + pattern: "*.{tsv,txt}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - txt: + type: file + description: Krona text-based input file converted from Kaiju report + pattern: "*.{txt,krona}" + +authors: + - "@MillironX" From 821dd844d8e7d87db095b2dc18ddb98cd01a81b4 Mon Sep 17 00:00:00 2001 From: "Thomas A. Christensen II" <25492070+MillironX@users.noreply.github.com> Date: Mon, 27 Jun 2022 09:23:20 -0500 Subject: [PATCH 2/5] Add Kaiju profile conversion to visualization workflow --- subworkflows/local/visualization_krona.nf | 22 ++++++++++++++++++++++ workflows/taxprofiler.nf | 2 +- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/subworkflows/local/visualization_krona.nf b/subworkflows/local/visualization_krona.nf index b9f645d..31dd5bc 100644 --- a/subworkflows/local/visualization_krona.nf +++ b/subworkflows/local/visualization_krona.nf @@ -2,6 +2,7 @@ // Create Krona visualizations // +include { KAIJU_KAIJU2KRONA } from '../../modules/nf-core/modules/kaiju/kaiju2krona/main' include { KRAKENTOOLS_KREPORT2KRONA } from '../../modules/nf-core/modules/krakentools/kreport2krona/main' include { KRONA_CLEANUP } from '../../modules/local/krona_cleanup' include { KRONA_KTIMPORTTEXT } from '../../modules/nf-core/modules/krona/ktimporttext/main' @@ -9,6 +10,7 @@ include { KRONA_KTIMPORTTEXT } from '../../modules/nf-core/modules/krona/ workflow VISUALIZATION_KRONA { take: profiles + databases main: ch_krona_text = Channel.empty() @@ -21,6 +23,7 @@ workflow VISUALIZATION_KRONA { ch_input_profiles = profiles .branch { centrifuge: it[0]['tool'] == 'centrifuge' + kaiju: it[0]['tool'] == 'kaiju' kraken2: it[0]['tool'] == 'kraken2' unknown: true } @@ -34,6 +37,25 @@ workflow VISUALIZATION_KRONA { ch_krona_text = ch_krona_text.mix( KRAKENTOOLS_KREPORT2KRONA.out.txt ) ch_versions = ch_versions.mix( KRAKENTOOLS_KREPORT2KRONA.out.versions.first() ) + /* + Combine Kaiju profiles with their databases + */ + ch_input_for_kaiju2krona = ch_input_profiles.kaiju + .map{ [it[0]['db_name'], it[0], it[1]] } + .combine( databases.map{ [it[0]['db_name'], it[1]] }, by: 0 ) + .multiMap{ + it -> + profiles: [it[1], it[2]] + db: it[3] + } + + /* + Convert Kaiju formatted reports into Krona text files + */ + KAIJU_KAIJU2KRONA( ch_input_for_kaiju2krona.profiles, ch_input_for_kaiju2krona.db ) + ch_krona_text = ch_krona_text.mix( KAIJU_KAIJU2KRONA.out.txt ) + ch_versions = ch_versions.mix( KAIJU_KAIJU2KRONA.out.versions.first() ) + /* Remove taxonomy level annotations from the Krona text files */ diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index f7ef07a..79a2bfd 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -214,7 +214,7 @@ workflow TAXPROFILER { SUBWORKFLOW: VISUALIZATION_KRONA */ if ( params.run_krona ) { - VISUALIZATION_KRONA ( PROFILING.out.profiles ) + VISUALIZATION_KRONA ( PROFILING.out.profiles, DB_CHECK.out.dbs ) ch_versions = ch_versions.mix( VISUALIZATION_KRONA.out.versions ) } From 4de43040a3403dae9b85679cdda9719ff2bb7117 Mon Sep 17 00:00:00 2001 From: "Thomas A. Christensen II" <25492070+MillironX@users.noreply.github.com> Date: Mon, 27 Jun 2022 12:12:16 -0500 Subject: [PATCH 3/5] Switch to using raw classifications for Kaiju2Krona --- subworkflows/local/profiling.nf | 25 ++++++++++++++--------- subworkflows/local/visualization_krona.nf | 9 ++++++-- workflows/taxprofiler.nf | 2 +- 3 files changed, 23 insertions(+), 13 deletions(-) diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index de5bea1..45c49f5 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -21,7 +21,8 @@ workflow PROFILING { main: ch_versions = Channel.empty() ch_multiqc_files = Channel.empty() - ch_raw_profiles = Channel.empty() + ch_raw_classifications = Channel.empty() + ch_raw_profiles = Channel.empty() /* COMBINE READS WITH POSSIBLE DATABASES @@ -110,6 +111,7 @@ workflow PROFILING { MEGAN_RMA2INFO (ch_maltrun_for_megan, params.malt_generate_megansummary ) ch_multiqc_files = ch_multiqc_files.mix( MALT_RUN.out.log.collect{it[1]}.ifEmpty([]) ) ch_versions = ch_versions.mix( MALT_RUN.out.versions.first(), MEGAN_RMA2INFO.out.versions.first() ) + ch_raw_classifications = ch_raw_classifications.mix( ch_maltrun_for_megan ) ch_raw_profiles = ch_raw_profiles.mix( MEGAN_RMA2INFO.out.txt ) } @@ -124,9 +126,10 @@ workflow PROFILING { } KRAKEN2_KRAKEN2 ( ch_input_for_kraken2.reads, ch_input_for_kraken2.db, params.kraken2_save_reads, params.kraken2_save_readclassification ) - ch_multiqc_files = ch_multiqc_files.mix( KRAKEN2_KRAKEN2.out.report.collect{it[1]}.ifEmpty([]) ) - ch_versions = ch_versions.mix( KRAKEN2_KRAKEN2.out.versions.first() ) - ch_raw_profiles = ch_raw_profiles.mix( KRAKEN2_KRAKEN2.out.report ) + ch_multiqc_files = ch_multiqc_files.mix( KRAKEN2_KRAKEN2.out.report.collect{it[1]}.ifEmpty([]) ) + ch_versions = ch_versions.mix( KRAKEN2_KRAKEN2.out.versions.first() ) + ch_raw_classifications = ch_raw_classifications.mix( KRAKEN2_KRAKEN2.out.classified_reads_assignment ) + ch_raw_profiles = ch_raw_profiles.mix( KRAKEN2_KRAKEN2.out.report ) } @@ -145,8 +148,9 @@ workflow PROFILING { CENTRIFUGE_CENTRIFUGE ( ch_input_for_centrifuge.reads, ch_input_for_centrifuge.db, params.centrifuge_save_reads, params.centrifuge_save_reads, params.centrifuge_save_reads ) CENTRIFUGE_KREPORT (CENTRIFUGE_CENTRIFUGE.out.results, ch_input_for_centrifuge.db) - ch_versions = ch_versions.mix( CENTRIFUGE_CENTRIFUGE.out.versions.first() ) - ch_raw_profiles = ch_raw_profiles.mix( CENTRIFUGE_KREPORT.out.kreport ) + ch_versions = ch_versions.mix( CENTRIFUGE_CENTRIFUGE.out.versions.first() ) + ch_raw_classifications = ch_raw_classifications.mix( CENTRIFUGE_CENTRIFUGE.out.results ) + ch_raw_profiles = ch_raw_profiles.mix( CENTRIFUGE_KREPORT.out.kreport ) } @@ -182,6 +186,7 @@ workflow PROFILING { KAIJU_KAIJU2TABLE (KAIJU_KAIJU.out.results, ch_input_for_kaiju.db, params.kaiju_taxon_name) ch_multiqc_files = ch_multiqc_files.mix( KAIJU_KAIJU2TABLE.out.summary.collect{it[1]}.ifEmpty([]) ) ch_versions = ch_versions.mix( KAIJU_KAIJU.out.versions.first() ) + ch_raw_classifications = ch_raw_classifications.mix( KAIJU_KAIJU.out.results ) ch_raw_profiles = ch_raw_profiles.mix( KAIJU_KAIJU2TABLE.out.summary ) } @@ -206,8 +211,8 @@ workflow PROFILING { } emit: - profiles = ch_raw_profiles // channel: [ val(meta), [ reads ] ] - should be text files or biom - versions = ch_versions // channel: [ versions.yml ] - mqc = ch_multiqc_files + classifications = ch_raw_classifications + profiles = ch_raw_profiles // channel: [ val(meta), [ reads ] ] - should be text files or biom + versions = ch_versions // channel: [ versions.yml ] + mqc = ch_multiqc_files } - diff --git a/subworkflows/local/visualization_krona.nf b/subworkflows/local/visualization_krona.nf index 31dd5bc..c5ca97a 100644 --- a/subworkflows/local/visualization_krona.nf +++ b/subworkflows/local/visualization_krona.nf @@ -9,6 +9,7 @@ include { KRONA_KTIMPORTTEXT } from '../../modules/nf-core/modules/krona/ workflow VISUALIZATION_KRONA { take: + classifications profiles databases @@ -23,10 +24,14 @@ workflow VISUALIZATION_KRONA { ch_input_profiles = profiles .branch { centrifuge: it[0]['tool'] == 'centrifuge' - kaiju: it[0]['tool'] == 'kaiju' kraken2: it[0]['tool'] == 'kraken2' unknown: true } + ch_input_classifications = classifications + .branch { + kaiju: it[0]['tool'] == 'kaiju' + unknown: true + } /* Convert Kraken2 formatted reports into Krona text files @@ -40,7 +45,7 @@ workflow VISUALIZATION_KRONA { /* Combine Kaiju profiles with their databases */ - ch_input_for_kaiju2krona = ch_input_profiles.kaiju + ch_input_for_kaiju2krona = ch_input_classifications.kaiju .map{ [it[0]['db_name'], it[0], it[1]] } .combine( databases.map{ [it[0]['db_name'], it[1]] }, by: 0 ) .multiMap{ diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index 79a2bfd..7eec13d 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -214,7 +214,7 @@ workflow TAXPROFILER { SUBWORKFLOW: VISUALIZATION_KRONA */ if ( params.run_krona ) { - VISUALIZATION_KRONA ( PROFILING.out.profiles, DB_CHECK.out.dbs ) + VISUALIZATION_KRONA ( PROFILING.out.classifications, PROFILING.out.profiles, DB_CHECK.out.dbs ) ch_versions = ch_versions.mix( VISUALIZATION_KRONA.out.versions ) } From 069370db8610aedd557438b0f071771cf97832fd Mon Sep 17 00:00:00 2001 From: "Thomas A. Christensen II" <25492070+MillironX@users.noreply.github.com> Date: Mon, 27 Jun 2022 13:39:42 -0500 Subject: [PATCH 4/5] Add unclassified and verbose flags to kaiju2krona --- conf/modules.config | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/conf/modules.config b/conf/modules.config index f9d0329..6563cc9 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -351,6 +351,10 @@ process { ] } + withName: KAIJU_KAIJU2KRONA { + ext.args = '-v -u' + } + withName: DIAMOND_BLASTX { ext.args = { "${meta.db_params}" } ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } From 6673ccf8d8bb0883d5b066e16ac59293830a80c1 Mon Sep 17 00:00:00 2001 From: "Thomas A. Christensen II" <25492070+MillironX@users.noreply.github.com> Date: Tue, 28 Jun 2022 13:13:37 +0000 Subject: [PATCH 5/5] Update channel assignment alignment in profiling subworkflow Co-authored-by: James A. Fellows Yates --- subworkflows/local/profiling.nf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index 45c49f5..c10ef5b 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -109,10 +109,10 @@ workflow PROFILING { } MEGAN_RMA2INFO (ch_maltrun_for_megan, params.malt_generate_megansummary ) - ch_multiqc_files = ch_multiqc_files.mix( MALT_RUN.out.log.collect{it[1]}.ifEmpty([]) ) - ch_versions = ch_versions.mix( MALT_RUN.out.versions.first(), MEGAN_RMA2INFO.out.versions.first() ) + ch_multiqc_files = ch_multiqc_files.mix( MALT_RUN.out.log.collect{it[1]}.ifEmpty([]) ) + ch_versions = ch_versions.mix( MALT_RUN.out.versions.first(), MEGAN_RMA2INFO.out.versions.first() ) ch_raw_classifications = ch_raw_classifications.mix( ch_maltrun_for_megan ) - ch_raw_profiles = ch_raw_profiles.mix( MEGAN_RMA2INFO.out.txt ) + ch_raw_profiles = ch_raw_profiles.mix( MEGAN_RMA2INFO.out.txt ) }