From 87a1d8051979040f7dd1b6f976b2bd0224a7bf24 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Sat, 7 May 2022 05:22:35 +0200 Subject: [PATCH 01/27] Add working output for Kraken2/Centrifuge/DIAMOND --- conf/modules.config | 4 +- conf/test.config | 4 ++ modules.json | 6 +- .../nf-core/modules/diamond/blastx/main.nf | 33 ++++++--- .../nf-core/modules/diamond/blastx/meta.yml | 30 +++++++- .../nf-core/modules/kraken2/kraken2/main.nf | 23 ++++-- .../nf-core/modules/kraken2/kraken2/meta.yml | 25 +++++-- nextflow.config | 14 ++-- nextflow_schema.json | 71 ++++++++++++++----- subworkflows/local/profiling.nf | 34 ++++++--- workflows/taxprofiler.nf | 2 + 11 files changed, 181 insertions(+), 65 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index cd0fb04..31d0fca 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -271,7 +271,7 @@ process { publishDir = [ path: { "${params.outdir}/kraken2/${meta.db_name}" }, mode: params.publish_dir_mode, - pattern: '*.{txt}' + pattern: '*.{txt,report,fastq.gz}' ] } @@ -289,7 +289,7 @@ process { publishDir = [ path: { "${params.outdir}/centrifuge/${meta.db_name}" }, mode: params.publish_dir_mode, - pattern: '*.txt' + pattern: '*.{txt,sam,gz}' ] ext.args = { "${meta.db_params}" } ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } diff --git a/conf/test.config b/conf/test.config index a5244f9..573db42 100644 --- a/conf/test.config +++ b/conf/test.config @@ -36,6 +36,10 @@ params { run_metaphlan3 = true run_centrifuge = true run_diamond = true + malt_save_reads = true + kraken2_save_reads = true + centrifuge_save_reads = true + diamond_save_reads = true } process { diff --git a/modules.json b/modules.json index a55c88b..5cad32e 100644 --- a/modules.json +++ b/modules.json @@ -28,7 +28,7 @@ "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, "diamond/blastx": { - "git_sha": "42564565b934eeb2449e35ec97ed13ff2a67f1de" + "git_sha": "bd3bfe0817246082525ab93707976676b1fe208b" }, "fastp": { "git_sha": "d0a1cbb703a130c19f6796c3fce24fbe7dfce789" @@ -43,7 +43,7 @@ "git_sha": "538dbac98ba9c8f799536cd5a617195501439457" }, "kraken2/kraken2": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "abe025677cdd805cc93032341ab19885473c1a07" }, "malt/run": { "git_sha": "72b96f4e504eef673f2b5c13560a9d90b669129b" @@ -80,4 +80,4 @@ } } } -} +} \ No newline at end of file diff --git a/modules/nf-core/modules/diamond/blastx/main.nf b/modules/nf-core/modules/diamond/blastx/main.nf index 6703c1e..d327227 100644 --- a/modules/nf-core/modules/diamond/blastx/main.nf +++ b/modules/nf-core/modules/diamond/blastx/main.nf @@ -2,21 +2,26 @@ process DIAMOND_BLASTX { tag "$meta.id" label 'process_medium' - // Dimaond is limited to v2.0.9 because there is not a - // singularity version higher than this at the current time. - conda (params.enable_conda ? "bioconda::diamond=2.0.9" : null) + conda (params.enable_conda ? "bioconda::diamond=2.0.15" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/diamond:2.0.9--hdcc8f71_0' : - 'quay.io/biocontainers/diamond:2.0.9--hdcc8f71_0' }" + 'https://depot.galaxyproject.org/singularity/diamond:2.0.15--hb97b32f_0' : + 'quay.io/biocontainers/diamond:2.0.15--hb97b32f_0' }" input: tuple val(meta), path(fasta) path db - val outext + val out_ext + val blast_columns output: - tuple val(meta), path('*.{blast,xml,txt,daa,sam,tsv,paf}'), emit: output - path "versions.yml" , emit: versions + tuple val(meta), path('*.blast'), optional: true, emit: blast + tuple val(meta), path('*.xml') , optional: true, emit: xml + tuple val(meta), path('*.txt') , optional: true, emit: txt + tuple val(meta), path('*.daa') , optional: true, emit: daa + tuple val(meta), path('*.sam') , optional: true, emit: sam + tuple val(meta), path('*.tsv') , optional: true, emit: tsv + tuple val(meta), path('*.paf') , optional: true, emit: paf + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -24,7 +29,8 @@ process DIAMOND_BLASTX { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - switch ( outext ) { + def columns = blast_columns ? "${blast_columns}" : '' + switch ( out_ext ) { case "blast": outfmt = 0; break case "xml": outfmt = 5; break case "txt": outfmt = 6; break @@ -32,6 +38,11 @@ process DIAMOND_BLASTX { case "sam": outfmt = 101; break case "tsv": outfmt = 102; break case "paf": outfmt = 103; break + default: + outfmt = '6'; + out_ext = 'txt'; + log.warn("Unknown output file format provided (${out_ext}): selecting DIAMOND default of tabular BLAST output (txt)"); + break } """ DB=`find -L ./ -name "*.dmnd" | sed 's/.dmnd//'` @@ -41,9 +52,9 @@ process DIAMOND_BLASTX { --threads $task.cpus \\ --db \$DB \\ --query $fasta \\ - --outfmt ${outfmt} \\ + --outfmt ${outfmt} ${columns} \\ $args \\ - --out ${prefix}.${outext} + --out ${prefix}.${out_ext} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/modules/diamond/blastx/meta.yml b/modules/nf-core/modules/diamond/blastx/meta.yml index 5ee2d55..2dcd7bc 100644 --- a/modules/nf-core/modules/diamond/blastx/meta.yml +++ b/modules/nf-core/modules/diamond/blastx/meta.yml @@ -28,7 +28,7 @@ input: type: directory description: Directory containing the nucelotide blast database pattern: "*" - - outext: + - out_ext: type: string description: | Specify the type of output file to be generated. `blast` corresponds to @@ -38,10 +38,34 @@ input: pattern: "blast|xml|txt|daa|sam|tsv|paf" output: + - blast: + type: file + description: File containing blastp hits + pattern: "*.{blast}" + - xml: + type: file + description: File containing blastp hits + pattern: "*.{xml}" - txt: type: file - description: File containing blastx hits - pattern: "*.{blastx.txt}" + description: File containing hits in tabular BLAST format. + pattern: "*.{txt}" + - daa: + type: file + description: File containing hits DAA format + pattern: "*.{daa}" + - sam: + type: file + description: File containing aligned reads in SAM format + pattern: "*.{sam}" + - tsv: + type: file + description: Tab separated file containing taxonomic classification of hits + pattern: "*.{tsv}" + - paf: + type: file + description: File containing aligned reads in pairwise mapping format format + pattern: "*.{paf}" - versions: type: file description: File containing software versions diff --git a/modules/nf-core/modules/kraken2/kraken2/main.nf b/modules/nf-core/modules/kraken2/kraken2/main.nf index 3ec5df5..d400023 100644 --- a/modules/nf-core/modules/kraken2/kraken2/main.nf +++ b/modules/nf-core/modules/kraken2/kraken2/main.nf @@ -10,12 +10,15 @@ process KRAKEN2_KRAKEN2 { input: tuple val(meta), path(reads) path db + val save_output_fastqs + val save_reads_assignment output: - tuple val(meta), path('*classified*') , emit: classified - tuple val(meta), path('*unclassified*'), emit: unclassified - tuple val(meta), path('*report.txt') , emit: txt - path "versions.yml" , emit: versions + tuple val(meta), path('*classified*') , optional:true, emit: classified_reads_fastq + tuple val(meta), path('*unclassified*') , optional:true, emit: unclassified_reads_fastq + tuple val(meta), path('*classifiedreads*'), optional:true, emit: classified_reads_assignment + tuple val(meta), path('*report.txt') , emit: report + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -26,19 +29,25 @@ process KRAKEN2_KRAKEN2 { def paired = meta.single_end ? "" : "--paired" def classified = meta.single_end ? "${prefix}.classified.fastq" : "${prefix}.classified#.fastq" def unclassified = meta.single_end ? "${prefix}.unclassified.fastq" : "${prefix}.unclassified#.fastq" + def classified_command = save_output_fastqs ? "--classified-out ${classified}" : "" + def unclassified_command = save_output_fastqs ? "--unclassified-out ${unclassified}" : "" + def readclassification_command = save_reads_assignment ? "--output ${prefix}.kraken2.classifiedreads.txt" : "" + def compress_reads_command = save_output_fastqs ? "pigz -p $task.cpus *.fastq" : "" + """ kraken2 \\ --db $db \\ --threads $task.cpus \\ - --unclassified-out $unclassified \\ - --classified-out $classified \\ --report ${prefix}.kraken2.report.txt \\ --gzip-compressed \\ + $unclassified_command \\ + $classified_command \\ + $readclassification_command \\ $paired \\ $args \\ $reads - pigz -p $task.cpus *.fastq + $compress_reads_command cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/modules/kraken2/kraken2/meta.yml b/modules/nf-core/modules/kraken2/kraken2/meta.yml index 9d6a385..7129fe3 100644 --- a/modules/nf-core/modules/kraken2/kraken2/meta.yml +++ b/modules/nf-core/modules/kraken2/kraken2/meta.yml @@ -27,25 +27,40 @@ input: - db: type: directory description: Kraken2 database + - save_output_fastqs: + type: boolean + description: | + If true, optional commands are added to save classified and unclassified reads + as fastq files + - save_reads_assignment: + type: boolean + description: | + If true, an optional command is added to save a file reporting the taxonomic + classification of each input read output: - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - classified: + - classified_reads_fastq: type: file description: | - Reads classified to belong to any of the taxa + Reads classified as belonging to any of the taxa on the Kraken2 database. pattern: "*{fastq.gz}" - - unclassified: + - unclassified_reads_fastq: type: file description: | - Reads not classified to belong to any of the taxa + Reads not classified to any of the taxa on the Kraken2 database. pattern: "*{fastq.gz}" - - txt: + - classified_reads_assignment: + type: file + description: | + Kraken2 output file indicating the taxonomic assignment of + each input read + - report: type: file description: | Kraken2 report containing stats about classified diff --git a/nextflow.config b/nextflow.config index ca9e280..3f76d53 100644 --- a/nextflow.config +++ b/nextflow.config @@ -94,16 +94,17 @@ params { // MALT run_malt = false malt_mode = 'BlastN' - malt_generatemegansummary = false + malt_generate_megansummary = false + malt_save_reads = false // kraken2 - run_kraken2 = false + run_kraken2 = false + kraken2_save_reads = false + kraken2_save_readclassification = false // centrifuge run_centrifuge = false - centrifuge_save_unaligned = false - centrifuge_save_aligned = false - centrifuge_sam_format = false + centrifuge_save_reads = false // metaphlan3 run_metaphlan3 = false @@ -114,7 +115,8 @@ params { // diamond run_diamond = false - diamond_output_format = 'txt' + diamond_output_format = 'tsv' // TSV is only format with taxonomic information apparently + diamond_save_reads = false // this will override diamound output format so no taxonomic profile is generated! } // Load base.config by default for all pipelines diff --git a/nextflow_schema.json b/nextflow_schema.json index ab2108e..bb4b759 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,7 +10,10 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": ["input", "outdir"], + "required": [ + "input", + "outdir" + ], "properties": { "input": { "type": "string", @@ -173,7 +176,14 @@ "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "enum": [ + "symlink", + "rellink", + "link", + "copy", + "copyNoFollow", + "move" + ], "hidden": true }, "email_on_fail": { @@ -278,15 +288,6 @@ "run_centrifuge": { "type": "boolean" }, - "centrifuge_save_unaligned": { - "type": "boolean" - }, - "centrifuge_save_aligned": { - "type": "boolean" - }, - "centrifuge_sam_format": { - "type": "boolean" - }, "run_metaphlan3": { "type": "boolean", "description": "Enable MetaPhlAn for taxonomic profiling" @@ -294,7 +295,10 @@ "shortread_clipmerge_tool": { "type": "string", "default": "fastp", - "enum": ["fastp", "adapterremoval"] + "enum": [ + "fastp", + "adapterremoval" + ] }, "shortread_clipmerge_skipadaptertrim": { "type": "boolean" @@ -335,7 +339,10 @@ "shortread_complexityfilter_prinseqplusplus_mode": { "type": "string", "default": "entropy", - "enum": ["entropy", "dust"] + "enum": [ + "entropy", + "dust" + ] }, "shortread_complexityfilter_prinseqplusplus_dustscore": { "type": "number", @@ -385,13 +392,20 @@ "run_kaiju": { "type": "boolean" }, - "malt_generatemegansummary": { + "malt_generate_megansummary": { "type": "boolean" }, "kaiju_taxon_name": { "type": "string", "default": "species", - "enum": ["phylum", "class", "order", "family", "genus", "species"] + "enum": [ + "phylum", + "class", + "order", + "family", + "genus", + "species" + ] }, "run_diamond": { "type": "boolean" @@ -399,11 +413,34 @@ "diamond_output_format": { "type": "string", "default": "tsv", - "enum": ["blast", "xml", "txt", "daa", "sam", "tsv", "paf"] + "enum": [ + "blast", + "xml", + "txt", + "daa", + "sam", + "tsv", + "paf" + ] }, "longread_hostremoval_index": { "type": "string", "default": "None" + }, + "malt_save_reads": { + "type": "boolean" + }, + "kraken2_save_reads": { + "type": "boolean" + }, + "kraken2_save_readclassification": { + "type": "boolean" + }, + "centrifuge_save_reads": { + "type": "boolean" + }, + "diamond_save_reads": { + "type": "boolean" } } -} +} \ No newline at end of file diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index 7fb3ce9..18ea7fa 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -65,10 +65,18 @@ workflow PROFILING { ch_input_for_malt = ch_input_for_profiling.malt .filter { it[0]['instrument_platform'] == 'ILLUMINA' } .map { - it -> - def temp_meta = [ id: it[2]['db_name']] + it[2] - def db = it[3] - [ temp_meta, it[1], db ] + meta, reads, db_meta, db -> + def sam_format = params.malt_save_reads ? ' --alignments' : "" + // TODO No MALT SAM? + // TODO check all aligned reads published + // TODO try turning on/off aligned reads + // TODO wut? [9a/a441d6] Submitted process > NFCORE_TAXPROFILER:TAXPROFILER:PROFILING:MALT_RUN (null) + def temp_meta = [ id: meta['db_name'] ] + def new_db_meta = db_meta.clone() + new_db_meta['db_params'] = db_meta['db_params'] + sam_format + def new_meta = temp_meta + new_db_meta + + [ new_meta, reads, db ] } .groupTuple(by: [0,2]) .multiMap { @@ -92,7 +100,7 @@ workflow PROFILING { [ meta_new, rma ] } - MEGAN_RMA2INFO (ch_maltrun_for_megan, params.malt_generatemegansummary ) + MEGAN_RMA2INFO (ch_maltrun_for_megan, params.malt_generate_megansummary ) ch_multiqc_files = ch_multiqc_files.mix( MALT_RUN.out.log.collect{it[1]}.ifEmpty([]) ) ch_versions = ch_versions.mix( MALT_RUN.out.versions.first(), MEGAN_RMA2INFO.out.versions.first() ) ch_raw_profiles = ch_raw_profiles.mix( MEGAN_RMA2INFO.out.txt ) @@ -108,10 +116,10 @@ workflow PROFILING { db: it[3] } - KRAKEN2_KRAKEN2 ( ch_input_for_kraken2.reads, ch_input_for_kraken2.db ) - ch_multiqc_files = ch_multiqc_files.mix( KRAKEN2_KRAKEN2.out.txt.collect{it[1]}.ifEmpty([]) ) + KRAKEN2_KRAKEN2 ( ch_input_for_kraken2.reads, ch_input_for_kraken2.db, params.kraken2_save_reads, params.kraken2_save_readclassification ) + ch_multiqc_files = ch_multiqc_files.mix( KRAKEN2_KRAKEN2.out.report.collect{it[1]}.ifEmpty([]) ) ch_versions = ch_versions.mix( KRAKEN2_KRAKEN2.out.versions.first() ) - ch_raw_profiles = ch_raw_profiles.mix( KRAKEN2_KRAKEN2.out.txt ) + ch_raw_profiles = ch_raw_profiles.mix( KRAKEN2_KRAKEN2.out.report ) } @@ -128,7 +136,7 @@ workflow PROFILING { db: it[3] } - CENTRIFUGE_CENTRIFUGE ( ch_input_for_centrifuge.reads, ch_input_for_centrifuge.db, params.centrifuge_save_unaligned, params.centrifuge_save_aligned, params.centrifuge_sam_format ) + CENTRIFUGE_CENTRIFUGE ( ch_input_for_centrifuge.reads, ch_input_for_centrifuge.db, params.centrifuge_save_reads, params.centrifuge_save_reads, params.centrifuge_save_reads ) CENTRIFUGE_KREPORT (CENTRIFUGE_CENTRIFUGE.out.results, ch_input_for_centrifuge.db) ch_versions = ch_versions.mix( CENTRIFUGE_CENTRIFUGE.out.versions.first() ) ch_raw_profiles = ch_raw_profiles.mix( CENTRIFUGE_KREPORT.out.kreport ) @@ -180,9 +188,13 @@ workflow PROFILING { db: it[3] } - DIAMOND_BLASTX ( ch_input_for_diamond.reads, ch_input_for_diamond.db, params.diamond_output_format ) + // diamond only accepts single output file specification, therefore + // this will replace output file! + ch_diamond_reads_format = params.diamond_save_reads ? 'sam' : params.diamond_output_format + + DIAMOND_BLASTX ( ch_input_for_diamond.reads, ch_input_for_diamond.db, params.diamond_output_format, [] ) ch_versions = ch_versions.mix( DIAMOND_BLASTX.out.versions.first() ) - ch_raw_profiles = ch_raw_profiles.mix( DIAMOND_BLASTX.out.output ) + ch_raw_profiles = ch_raw_profiles.mix( DIAMOND_BLASTX.out.tsv ) } diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index 7a6cd09..c319296 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -29,6 +29,8 @@ if (params.hostremoval_reference ) { ch_reference = file(params.hostre if (params.shortread_hostremoval_index ) { ch_shortread_reference_index = file(params.shortread_hostremoval_index ) } else { ch_shortread_reference_index = [] } if (params.longread_hostremoval_index ) { ch_longread_reference_index = file(params.longread_hostremoval_index ) } else { ch_longread_reference_index = [] } +if (params.diamond_save_reads ) log.warn "[nf-core/taxprofiler] DIAMOND only allows output of a single format. Only aligned reads in SAM format will be produced, no taxonomic profiles will be available." + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ CONFIG FILES From 2838c136bda47ad24d36b5ebb174998d58c57348 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Thu, 2 Jun 2022 12:51:39 +0200 Subject: [PATCH 02/27] Prettier --- modules.json | 2 +- nextflow_schema.json | 45 +++++++------------------------------------- 2 files changed, 8 insertions(+), 39 deletions(-) diff --git a/modules.json b/modules.json index 5cad32e..9d918b3 100644 --- a/modules.json +++ b/modules.json @@ -80,4 +80,4 @@ } } } -} \ No newline at end of file +} diff --git a/nextflow_schema.json b/nextflow_schema.json index e7b495d..f0093b1 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,10 +10,7 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": [ - "input", - "outdir" - ], + "required": ["input", "outdir"], "properties": { "input": { "type": "string", @@ -176,14 +173,7 @@ "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": [ - "symlink", - "rellink", - "link", - "copy", - "copyNoFollow", - "move" - ], + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], "hidden": true }, "email_on_fail": { @@ -295,10 +285,7 @@ "shortread_clipmerge_tool": { "type": "string", "default": "fastp", - "enum": [ - "fastp", - "adapterremoval" - ] + "enum": ["fastp", "adapterremoval"] }, "shortread_clipmerge_skipadaptertrim": { "type": "boolean" @@ -340,10 +327,7 @@ "shortread_complexityfilter_prinseqplusplus_mode": { "type": "string", "default": "entropy", - "enum": [ - "entropy", - "dust" - ] + "enum": ["entropy", "dust"] }, "shortread_complexityfilter_prinseqplusplus_dustscore": { "type": "number", @@ -399,14 +383,7 @@ "kaiju_taxon_name": { "type": "string", "default": "species", - "enum": [ - "phylum", - "class", - "order", - "family", - "genus", - "species" - ] + "enum": ["phylum", "class", "order", "family", "genus", "species"] }, "run_diamond": { "type": "boolean" @@ -414,15 +391,7 @@ "diamond_output_format": { "type": "string", "default": "tsv", - "enum": [ - "blast", - "xml", - "txt", - "daa", - "sam", - "tsv", - "paf" - ] + "enum": ["blast", "xml", "txt", "daa", "sam", "tsv", "paf"] }, "longread_hostremoval_index": { "type": "string", @@ -448,4 +417,4 @@ "type": "boolean" } } -} \ No newline at end of file +} From 3313b90453ed0e4bf2571a28d1436427a05b90f7 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Fri, 3 Jun 2022 13:50:48 +0200 Subject: [PATCH 03/27] Fix JSOn schema --- nextflow_schema.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index a82ff9e..2c00348 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -418,7 +418,7 @@ "longread_qc_target_bases": { "type": "integer", "default": 500000000 - } + }, "malt_save_reads": { "type": "boolean" }, From d48b3be5a780d87b30c8f9ce189a062676cc601a Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Fri, 3 Jun 2022 13:55:17 +0200 Subject: [PATCH 04/27] Tweak DIAMOND save_reads message --- nextflow.config | 2 +- workflows/taxprofiler.nf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/nextflow.config b/nextflow.config index 11cbfc0..e5b8a1d 100644 --- a/nextflow.config +++ b/nextflow.config @@ -124,7 +124,7 @@ params { // diamond run_diamond = false diamond_output_format = 'tsv' // TSV is only format with taxonomic information apparently - diamond_save_reads = false // this will override diamound output format so no taxonomic profile is generated! + diamond_save_reads = false // this will override default diamond output format so no taxonomic profile is generated! } // Load base.config by default for all pipelines diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index 6a05dbe..f29a366 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -33,7 +33,7 @@ if (params.hostremoval_reference ) { ch_reference = file(params.hostre if (params.shortread_hostremoval_index ) { ch_shortread_reference_index = file(params.shortread_hostremoval_index ) } else { ch_shortread_reference_index = [] } if (params.longread_hostremoval_index ) { ch_longread_reference_index = file(params.longread_hostremoval_index ) } else { ch_longread_reference_index = [] } -if (params.diamond_save_reads ) log.warn "[nf-core/taxprofiler] DIAMOND only allows output of a single format. Only aligned reads in SAM format will be produced, no taxonomic profiles will be available." +if (params.diamond_save_reads ) log.warn "[nf-core/taxprofiler] DIAMOND only allows output of a single format. As --diamond_save_reads supplied, only aligned reads in SAM format will be produced, no taxonomic profiles will be available." /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From 9462032d00824f16c55fe4a313404282eddab91d Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Fri, 3 Jun 2022 22:29:04 +0200 Subject: [PATCH 05/27] Fix MALT save_alignemnts reads and some clean up --- conf/test.config | 3 +++ nextflow.config | 2 +- subworkflows/local/longread_preprocessing.nf | 2 +- subworkflows/local/profiling.nf | 10 ++++------ 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/conf/test.config b/conf/test.config index 996d5de..e9fa62e 100644 --- a/conf/test.config +++ b/conf/test.config @@ -47,4 +47,7 @@ process { withName: MALT_RUN { maxForks = 1 } + withName: MEGAN_RMA2INFO { + maxForks = 1 + } } diff --git a/nextflow.config b/nextflow.config index e5b8a1d..73fd0b3 100644 --- a/nextflow.config +++ b/nextflow.config @@ -196,7 +196,7 @@ profiles { test { includeConfig 'conf/test.config' } test_full { includeConfig 'conf/test_full.config' } test_noprofiling { includeConfig 'conf/test_noprofiling.config' } - test_nopreprocessing { includeConfig 'conf/test_preprocessing.config' } + test_nopreprocessing { includeConfig 'conf/test_nopreprocessing.config' } } // Load igenomes.config if required diff --git a/subworkflows/local/longread_preprocessing.nf b/subworkflows/local/longread_preprocessing.nf index 5ae5417..6a23b0e 100644 --- a/subworkflows/local/longread_preprocessing.nf +++ b/subworkflows/local/longread_preprocessing.nf @@ -48,7 +48,7 @@ workflow LONGREAD_PREPROCESSING { } - FASTQC_PROCESSED ( ch_processed_reads.dump(tag: "filtlong") ) + FASTQC_PROCESSED ( ch_processed_reads ) ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip ) emit: diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index 18ea7fa..b83c78f 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -66,19 +66,17 @@ workflow PROFILING { .filter { it[0]['instrument_platform'] == 'ILLUMINA' } .map { meta, reads, db_meta, db -> - def sam_format = params.malt_save_reads ? ' --alignments' : "" - // TODO No MALT SAM? - // TODO check all aligned reads published - // TODO try turning on/off aligned reads + def sam_format = params.malt_save_reads ? ' --alignments ./ -za false' : "" // TODO wut? [9a/a441d6] Submitted process > NFCORE_TAXPROFILER:TAXPROFILER:PROFILING:MALT_RUN (null) def temp_meta = [ id: meta['db_name'] ] def new_db_meta = db_meta.clone() new_db_meta['db_params'] = db_meta['db_params'] + sam_format def new_meta = temp_meta + new_db_meta - + new_meta['id'] = new_meta['db_name'] [ new_meta, reads, db ] } .groupTuple(by: [0,2]) + .dump(tag: "into_malt") .multiMap { it -> reads: [ it[0], it[1].flatten() ] @@ -192,7 +190,7 @@ workflow PROFILING { // this will replace output file! ch_diamond_reads_format = params.diamond_save_reads ? 'sam' : params.diamond_output_format - DIAMOND_BLASTX ( ch_input_for_diamond.reads, ch_input_for_diamond.db, params.diamond_output_format, [] ) + DIAMOND_BLASTX ( ch_input_for_diamond.reads, ch_input_for_diamond.db, ch_diamond_reads_format , [] ) ch_versions = ch_versions.mix( DIAMOND_BLASTX.out.versions.first() ) ch_raw_profiles = ch_raw_profiles.mix( DIAMOND_BLASTX.out.tsv ) From ebdd5683b248d2664fbd41fdc32cb5bf06e2e4ab Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Fri, 3 Jun 2022 22:33:48 +0200 Subject: [PATCH 06/27] Apply suggestions from code review --- subworkflows/local/profiling.nf | 2 -- 1 file changed, 2 deletions(-) diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index b83c78f..9bae127 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -67,7 +67,6 @@ workflow PROFILING { .map { meta, reads, db_meta, db -> def sam_format = params.malt_save_reads ? ' --alignments ./ -za false' : "" - // TODO wut? [9a/a441d6] Submitted process > NFCORE_TAXPROFILER:TAXPROFILER:PROFILING:MALT_RUN (null) def temp_meta = [ id: meta['db_name'] ] def new_db_meta = db_meta.clone() new_db_meta['db_params'] = db_meta['db_params'] + sam_format @@ -76,7 +75,6 @@ workflow PROFILING { [ new_meta, reads, db ] } .groupTuple(by: [0,2]) - .dump(tag: "into_malt") .multiMap { it -> reads: [ it[0], it[1].flatten() ] From 490a8a8a840566a53ac252b107c3c1fff207c61c Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Thu, 9 Jun 2022 08:15:58 +0200 Subject: [PATCH 07/27] Remove dump --- subworkflows/local/profiling.nf | 1 - 1 file changed, 1 deletion(-) diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index b83c78f..03f0bf5 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -76,7 +76,6 @@ workflow PROFILING { [ new_meta, reads, db ] } .groupTuple(by: [0,2]) - .dump(tag: "into_malt") .multiMap { it -> reads: [ it[0], it[1].flatten() ] From ec13b8d608c5a95e9a2e27ae6c2b76691d5b0448 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Thu, 9 Jun 2022 08:21:48 +0200 Subject: [PATCH 08/27] Remove support for uncompressed FASTQ files --- bin/check_samplesheet.py | 2 +- docs/usage.md | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index d10ee90..47c6452 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -48,7 +48,7 @@ def check_samplesheet(file_in, file_out): 2613,ERR5766181,ILLUMINA,ERX5474937_ERR5766181_1.fastq.gz,ERX5474937_ERR5766181_2.fastq.gz, """ - FQ_EXTENSIONS = (".fq", ".fq.gz", ".fastq", ".fastq.gz") + FQ_EXTENSIONS = (".fq.gz", ".fastq.gz") FA_EXTENSIONS = ( ".fa", ".fa.gz", diff --git a/docs/usage.md b/docs/usage.md index 54ffce0..8b40745 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -12,6 +12,8 @@ nf-core/taxprofiler can accept as input raw or preprocessed single- or paired-end short-read (e.g. Illumina) FASTQ files, long-read FASTQ files (e.g. Oxford Nanopore), or FASTA sequences (available for a subset of profilers). +> ⚠️ Input FASTQ files _must_ be gzipped, while FASTA files may optionally be uncompressed (although this is not recommended) + You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 6 columns, and a header row as shown in the examples below. Furthermother, nf-core/taxprofiler also requires a second comma-separated file of 3 columns with a header row as in the examples below. This samplesheet is then specified on the command line as follows: From 621b6a3d092e00a11086444083c95544ad8781b8 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Fri, 10 Jun 2022 11:27:51 +0200 Subject: [PATCH 09/27] Add comments about MALT id replacement and refactor for simplicity --- subworkflows/local/profiling.nf | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index 9bae127..7d35837 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -66,13 +66,20 @@ workflow PROFILING { .filter { it[0]['instrument_platform'] == 'ILLUMINA' } .map { meta, reads, db_meta, db -> - def sam_format = params.malt_save_reads ? ' --alignments ./ -za false' : "" - def temp_meta = [ id: meta['db_name'] ] + def new_meta = meta.clone() def new_db_meta = db_meta.clone() + + // Add the saving of alignments in SAM format to params + def sam_format = params.malt_save_reads ? ' --alignments ./ -za false' : "" new_db_meta['db_params'] = db_meta['db_params'] + sam_format - def new_meta = temp_meta + new_db_meta - new_meta['id'] = new_meta['db_name'] - [ new_meta, reads, db ] + + // As MALT has huge databases, we don't run on a per-sample basis but multiple + // samples at once. This replaces the ID of the particular process with the + // db_name instead to prevent `null` in job name, and in publishDir) + def updated_meta = new_meta + new_db_meta + updated_meta['id'] = updated_meta['db_name'] + + [ updated_meta, reads, db ] } .groupTuple(by: [0,2]) .multiMap { From e0ad49ebc9f22120ff001c08f3b21facf6038c22 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Fri, 10 Jun 2022 19:33:37 +0200 Subject: [PATCH 10/27] Fix metadata manipulaton for malt --- subworkflows/local/profiling.nf | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index 7d35837..de5bea1 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -61,25 +61,29 @@ workflow PROFILING { // MALT: We groupTuple to have all samples in one channel for MALT as database // loading takes a long time, so we only want to run it once per database - // TODO document somewhere we only accept illumina short reads for MALT? ch_input_for_malt = ch_input_for_profiling.malt .filter { it[0]['instrument_platform'] == 'ILLUMINA' } .map { meta, reads, db_meta, db -> - def new_meta = meta.clone() + + // Reset entire input meta for MALT to just database name, + // as we don't run run on a per-sample basis due to huge datbaases + // so all samples are in one run and so sample-specific metadata + // unnecessary. Set as database name to prevent `null` job ID and prefix. + def temp_meta = [ id: meta['db_name'] ] + + // Extend database parameters to specify whether to save alignments or not def new_db_meta = db_meta.clone() - - // Add the saving of alignments in SAM format to params def sam_format = params.malt_save_reads ? ' --alignments ./ -za false' : "" new_db_meta['db_params'] = db_meta['db_params'] + sam_format - - // As MALT has huge databases, we don't run on a per-sample basis but multiple - // samples at once. This replaces the ID of the particular process with the - // db_name instead to prevent `null` in job name, and in publishDir) - def updated_meta = new_meta + new_db_meta - updated_meta['id'] = updated_meta['db_name'] - [ updated_meta, reads, db ] + // Combine reduced sample metadata with updated database parameters metadata, + // make sure id is db_name for publishing purposes. + def new_meta = temp_meta + new_db_meta + new_meta['id'] = new_meta['db_name'] + + [ new_meta, reads, db ] + } .groupTuple(by: [0,2]) .multiMap { From a84f693479340e5ae13f97576d76c42c7320d4c2 Mon Sep 17 00:00:00 2001 From: "Thomas A. Christensen II" <25492070+MillironX@users.noreply.github.com> Date: Tue, 14 Jun 2022 08:13:36 -0500 Subject: [PATCH 11/27] Install KRAKENTOOLS_KREPORT2KRONA module --- modules.json | 3 ++ .../modules/krakentools/kreport2krona/main.nf | 36 ++++++++++++++++ .../krakentools/kreport2krona/meta.yml | 41 +++++++++++++++++++ 3 files changed, 80 insertions(+) create mode 100644 modules/nf-core/modules/krakentools/kreport2krona/main.nf create mode 100644 modules/nf-core/modules/krakentools/kreport2krona/meta.yml diff --git a/modules.json b/modules.json index a8f5a57..b9d9dba 100644 --- a/modules.json +++ b/modules.json @@ -48,6 +48,9 @@ "kraken2/kraken2": { "git_sha": "abe025677cdd805cc93032341ab19885473c1a07" }, + "krakentools/kreport2krona": { + "git_sha": "8b2a473f586bed003e72d2b183acc43fc0ddc422" + }, "malt/run": { "git_sha": "72b96f4e504eef673f2b5c13560a9d90b669129b" }, diff --git a/modules/nf-core/modules/krakentools/kreport2krona/main.nf b/modules/nf-core/modules/krakentools/kreport2krona/main.nf new file mode 100644 index 0000000..3bf46ee --- /dev/null +++ b/modules/nf-core/modules/krakentools/kreport2krona/main.nf @@ -0,0 +1,36 @@ +def VERSION = '1.2' // Version information not provided by tool on CLI + +process KRAKENTOOLS_KREPORT2KRONA { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::krakentools=1.2" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/krakentools:1.2--pyh5e36f6f_0': + 'quay.io/biocontainers/krakentools:1.2--pyh5e36f6f_0' }" + + input: + tuple val(meta), path(kreport) + + output: + tuple val(meta), path("*.txt"), emit: txt + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + kreport2krona.py \\ + -r ${kreport} \\ + -o ${prefix}.txt \\ + ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + kreport2krona.py: ${VERSION} + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/krakentools/kreport2krona/meta.yml b/modules/nf-core/modules/krakentools/kreport2krona/meta.yml new file mode 100644 index 0000000..2f8a163 --- /dev/null +++ b/modules/nf-core/modules/krakentools/kreport2krona/meta.yml @@ -0,0 +1,41 @@ +name: krakentools_kreport2krona +description: Takes a Kraken report file and prints out a krona-compatible TEXT file +keywords: + - kraken + - krona + - metagenomics + - visualization +tools: + - krakentools: + description: KrakenTools is a suite of scripts to be used for post-analysis of Kraken/KrakenUniq/Kraken2/Bracken results. Please cite the relevant paper if using KrakenTools with any of the listed programs. + homepage: https://github.com/jenniferlu717/KrakenTools + licence: ["GPL v3"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - kreport: + type: file + description: Kraken report + pattern: "*.{txt,kreport}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - krona: + type: file + description: Krona text-based input file converted from Kraken report + pattern: "*.{txt,krona}" + +authors: + - "@MillironX" From 87678c127bb5ed16c05f860ff2df59a7c4524f19 Mon Sep 17 00:00:00 2001 From: "Thomas A. Christensen II" <25492070+MillironX@users.noreply.github.com> Date: Tue, 14 Jun 2022 08:23:47 -0500 Subject: [PATCH 12/27] Add KRONA_CLEANUP process module --- modules/local/krona_cleanup.nf | 40 ++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 modules/local/krona_cleanup.nf diff --git a/modules/local/krona_cleanup.nf b/modules/local/krona_cleanup.nf new file mode 100644 index 0000000..804cb69 --- /dev/null +++ b/modules/local/krona_cleanup.nf @@ -0,0 +1,40 @@ +process KRONA_CLEANUP { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "conda-forge::sed=4.7" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv1/biocontainers_v1.2.0_cv1.img' : + 'biocontainers/biocontainers:v1.2.0_cv1' }" + + input: + tuple val(meta), path(krona, stageAs: 'uncleaned.krona.txt') + + output: + tuple val(meta), path("*.txt"), emit: txt + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + # Copy the file to a new name + cp ${krona} ${prefix}.txt + + # Remove ugly 'x__' prefixes for each of the taxonomic levels + LEVELS=(d k p c o f g s) + for L in "\${LEVELS[@]}"; do + sed -i "s/\${L}__//g" ${prefix}.txt + done + + # Remove underscores that are standing in place of spaces + sed -i "s/_/ /g" ${prefix}.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sed: \$(echo \$(sed --version 2>&1) | sed 's/^.*GNU sed) //; s/ .*\$//') + END_VERSIONS + """ +} From 5da8d5b7418cc778284f64110f7fe721a29aa148 Mon Sep 17 00:00:00 2001 From: "Thomas A. Christensen II" <25492070+MillironX@users.noreply.github.com> Date: Tue, 14 Jun 2022 08:27:42 -0500 Subject: [PATCH 13/27] Install KRONA_KTIMPORTTEXT --- modules.json | 3 ++ .../modules/krona/ktimporttext/main.nf | 34 ++++++++++++++ .../modules/krona/ktimporttext/meta.yml | 47 +++++++++++++++++++ 3 files changed, 84 insertions(+) create mode 100644 modules/nf-core/modules/krona/ktimporttext/main.nf create mode 100644 modules/nf-core/modules/krona/ktimporttext/meta.yml diff --git a/modules.json b/modules.json index b9d9dba..4956cc8 100644 --- a/modules.json +++ b/modules.json @@ -51,6 +51,9 @@ "krakentools/kreport2krona": { "git_sha": "8b2a473f586bed003e72d2b183acc43fc0ddc422" }, + "krona/ktimporttext": { + "git_sha": "cdefbec66999c0b49d8bfeea9d6f9d19056635a2" + }, "malt/run": { "git_sha": "72b96f4e504eef673f2b5c13560a9d90b669129b" }, diff --git a/modules/nf-core/modules/krona/ktimporttext/main.nf b/modules/nf-core/modules/krona/ktimporttext/main.nf new file mode 100644 index 0000000..de0cfc2 --- /dev/null +++ b/modules/nf-core/modules/krona/ktimporttext/main.nf @@ -0,0 +1,34 @@ +process KRONA_KTIMPORTTEXT { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::krona=2.8.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/krona:2.8.1--pl5321hdfd78af_1': + 'quay.io/biocontainers/krona:2.8.1--pl5321hdfd78af_1' }" + + input: + tuple val(meta), path(report) + + output: + tuple val(meta), path ('*.html'), emit: html + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + ktImportText \\ + $args \\ + -o ${prefix}.html \\ + $report + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + krona: \$( echo \$(ktImportText 2>&1) | sed 's/^.*KronaTools //g; s/- ktImportText.*\$//g') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/krona/ktimporttext/meta.yml b/modules/nf-core/modules/krona/ktimporttext/meta.yml new file mode 100644 index 0000000..a7108e0 --- /dev/null +++ b/modules/nf-core/modules/krona/ktimporttext/meta.yml @@ -0,0 +1,47 @@ +name: "krona_ktimporttext" +description: Creates a Krona chart from text files listing quantities and lineages. +keywords: + - plot + - taxonomy + - interactive + - html + - visualisation + - krona chart + - metagenomics +tools: + - krona: + description: Krona Tools is a set of scripts to create Krona charts from several Bioinformatics tools as well as from text and XML files. + homepage: https://github.com/marbl/Krona/wiki/KronaTools + documentation: http://manpages.ubuntu.com/manpages/impish/man1/ktImportTaxonomy.1.html + tool_dev_url: https://github.com/marbl/Krona + doi: 10.1186/1471-2105-12-385 + licence: https://raw.githubusercontent.com/marbl/Krona/master/KronaTools/LICENSE.txt + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - report: + type: file + description: "Tab-delimited text file. Each line should be a number followed by a list of wedges to contribute to (starting from the highest level). If no wedges are listed (and just a quantity is given), it will contribute to the top level. If the same lineage is listed more than once, the values will be added. Quantities can be omitted if -q is specified. Lines beginning with '#' will be ignored." + pattern: "*.{txt}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - html: + type: file + description: A html file containing an interactive krona plot. + pattern: "*.{html}" + +authors: + - "@jianhong" From 4ff2145e44d0c5b98f0d90cc43813cb343b2ee55 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Mon, 20 Jun 2022 08:23:14 +0200 Subject: [PATCH 14/27] Roll back MALT version --- modules.json | 2 +- modules/nf-core/modules/malt/run/main.nf | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/modules.json b/modules.json index a8f5a57..9561b0f 100644 --- a/modules.json +++ b/modules.json @@ -49,7 +49,7 @@ "git_sha": "abe025677cdd805cc93032341ab19885473c1a07" }, "malt/run": { - "git_sha": "72b96f4e504eef673f2b5c13560a9d90b669129b" + "git_sha": "be8d7b3293cac26cc63e4dbfb364deb8ed6ec7e5" }, "megan/rma2info": { "git_sha": "2d38566eca4cc15142b2ffa7c11837569b39aece" diff --git a/modules/nf-core/modules/malt/run/main.nf b/modules/nf-core/modules/malt/run/main.nf index 4e2e50c..2b91d90 100644 --- a/modules/nf-core/modules/malt/run/main.nf +++ b/modules/nf-core/modules/malt/run/main.nf @@ -2,10 +2,10 @@ process MALT_RUN { tag "$meta.id" label 'process_high' - conda (params.enable_conda ? "bioconda::malt=0.53" : null) + conda (params.enable_conda ? "bioconda::malt=0.41" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/malt:0.53--hdfd78af_0' : - 'quay.io/biocontainers/malt:0.53--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/malt:0.41--1' : + 'quay.io/biocontainers/malt:0.41--1' }" input: tuple val(meta), path(fastqs) @@ -33,7 +33,6 @@ process MALT_RUN { """ malt-run \\ - -J-Xmx${avail_mem}g \\ -t $task.cpus \\ -v \\ -o . \\ From 4552b53e757518cd51791f4f841a068bd8896da9 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Mon, 20 Jun 2022 08:30:23 +0200 Subject: [PATCH 15/27] Add note about MALT version to docs --- docs/usage.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/usage.md b/docs/usage.md index 9c47898..1893c10 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -213,6 +213,12 @@ Activating this functionality will concatenate the FASTQ files with the same sam You can optionally save the FASTQ output of the run merging with the `--save_runmerged_reads`. +##### Profiling + +###### MALT + +nf-core/taxprofiler uses MALT 0.4.1, which is a compatively old version. However it has been found that the most recent version of MALT (0.5.\*), at the time of writing, is broken. [The the LCA step appears not to be executed](http://megan.informatik.uni-tuebingen.de/t/lca-placement-failure-with-malt-v-0-5-2-and-0-5-3/1996/3), pushing all hits to the leaves of the taxonomy. However, if you need to use a more recent taxonomy map file with your databases, the output of `malt-build` from MALT 0.5.3 should be still be compatible with `malt-run` of 0.4.1. + ### Updating the pipeline When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: From a40a442178487dce25aa9f10109098976868a198 Mon Sep 17 00:00:00 2001 From: "Thomas A. Christensen II" <25492070+MillironX@users.noreply.github.com> Date: Mon, 27 Jun 2022 08:22:47 -0500 Subject: [PATCH 16/27] Create VISUALIZATION_KRONA workflow to create Krona charts --- subworkflows/local/visualization_krona.nf | 55 +++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 subworkflows/local/visualization_krona.nf diff --git a/subworkflows/local/visualization_krona.nf b/subworkflows/local/visualization_krona.nf new file mode 100644 index 0000000..87db112 --- /dev/null +++ b/subworkflows/local/visualization_krona.nf @@ -0,0 +1,55 @@ +// +// Create Krona visualizations +// + +include { KRAKENTOOLS_KREPORT2KRONA } from '../../modules/nf-core/modules/krakentools/kreport2krona/main' +include { KRONA_CLEANUP } from '../../modules/local/krona_cleanup' +include { KRONA_KTIMPORTTEXT } from '../../modules/nf-core/modules/krona/ktimporttext/main' + +workflow VISUALIZATION_KRONA { + take: + profiles + + main: + ch_krona_text = Channel.empty() + ch_krona_html = Channel.empty() + ch_versions = Channel.empty() + + /* + Split profile results based on tool they come from + */ + ch_input_profiles = profiles + .branch { + kraken2: it[0]['tool'] == 'kraken2' + unknown: true + } + + /* + Convert Kraken2 formatted reports into Krona text files + */ + ch_kraken_reports = ch_input_profiles.kraken2 + KRAKENTOOLS_KREPORT2KRONA ( ch_kraken_reports ) + ch_krona_text = ch_krona_text.mix( KRAKENTOOLS_KREPORT2KRONA.out.txt ) + ch_versions = ch_versions.mix( KRAKENTOOLS_KREPORT2KRONA.out.versions.first() ) + + /* + Remove taxonomy level annotations from the Krona text files + */ + KRONA_CLEANUP( ch_krona_text ) + ch_cleaned_krona_text = KRONA_CLEANUP.out.txt + ch_versions = ch_versions.mix( KRONA_CLEANUP.out.versions.first() ) + + /* + Convert Krona text files into html Krona visualizations + */ + ch_krona_text_for_import = ch_cleaned_krona_text + .map{[[id: it[0]['db_name']], it[1]]} + .groupTuple() + KRONA_KTIMPORTTEXT( ch_krona_text_for_import ) + ch_krona_html = ch_krona_html.mix( KRONA_KTIMPORTTEXT.out.html ) + ch_versions = ch_versions.mix( KRONA_KTIMPORTTEXT.out.versions.first() ) + + emit: + html = ch_krona_html + versions = ch_versions +} From ad9bc4b7e9d2ec09bcd8fd53a6ea67f86a1e2f73 Mon Sep 17 00:00:00 2001 From: "Thomas A. Christensen II" <25492070+MillironX@users.noreply.github.com> Date: Mon, 20 Jun 2022 09:03:47 -0500 Subject: [PATCH 17/27] Add the visualization workflow to the main workflow --- workflows/taxprofiler.nf | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index f29a366..313790c 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -62,6 +62,7 @@ include { SHORTREAD_HOSTREMOVAL } from '../subworkflows/local/shortread_ include { LONGREAD_HOSTREMOVAL } from '../subworkflows/local/longread_hostremoval' include { SHORTREAD_COMPLEXITYFILTERING } from '../subworkflows/local/shortread_complexityfiltering' include { PROFILING } from '../subworkflows/local/profiling' +include { VISUALIZATION_KRONA } from '../subworkflows/local/visualization_krona' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -209,6 +210,12 @@ workflow TAXPROFILER { PROFILING ( ch_reads_runmerged, DB_CHECK.out.dbs ) ch_versions = ch_versions.mix( PROFILING.out.versions ) + /* + SUBWORKFLOW: VISUALIZATION_KRONA + */ + VISUALIZATION_KRONA ( PROFILING.out.profiles ) + ch_versions = ch_versions.mix( VISUALIZATION_KRONA.out.versions ) + /* MODULE: MultiQC */ From 0e8edd5b2f097e2c7a6c1b7a39fb5a19671395ce Mon Sep 17 00:00:00 2001 From: "Thomas A. Christensen II" <25492070+MillironX@users.noreply.github.com> Date: Tue, 14 Jun 2022 08:48:16 -0500 Subject: [PATCH 18/27] Add output configuration for Kraken Krona chart --- conf/modules.config | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/conf/modules.config b/conf/modules.config index 46320cf..f9d0329 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -293,6 +293,14 @@ process { ] } + withName: KRONA_KTIMPORTTEXT { + publishDir = [ + path: { "${params.outdir}/krona" }, + mode: params.publish_dir_mode, + pattern: '*.{html}' + ] + } + withName: METAPHLAN3 { ext.args = { "${meta.db_params}" } ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } From 15ac8b39fa8a2e6b490d05d69e1634f48eb35117 Mon Sep 17 00:00:00 2001 From: "Thomas A. Christensen II" <25492070+MillironX@users.noreply.github.com> Date: Tue, 14 Jun 2022 10:54:10 -0500 Subject: [PATCH 19/27] Add citation for Krona --- CITATIONS.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CITATIONS.md b/CITATIONS.md index 8044658..1ce4ec2 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -36,6 +36,10 @@ > Wood, Derrick E., Jennifer Lu, and Ben Langmead. 2019. Improved Metagenomic Analysis with Kraken 2. Genome Biology 20 (1): 257. doi: 10.1186/s13059-019-1891-0. +- [Krona](https://doi.org/10.1186/1471-2105-12-385) + + > Ondov, Brian D., Nicholas H. Bergman, and Adam M. Phillippy. 2011. Interactive metagenomic visualization in a Web browser. BMC Bioinformatics 12 (1): 385. doi: 10.1186/1471-2105-12-385. + - [MALT](https://doi.org/10.1038/s41559-017-0446-6) > Vågene, Åshild J., Alexander Herbig, Michael G. Campana, Nelly M. Robles García, Christina Warinner, Susanna Sabin, Maria A. Spyrou, et al. 2018. Salmonella Enterica Genomes from Victims of a Major Sixteenth-Century Epidemic in Mexico. Nature Ecology & Evolution 2 (3): 520-28. doi: 10.1038/s41559-017-0446-6. From 18284e6e32d1090336f37fd024bb5703cc80503b Mon Sep 17 00:00:00 2001 From: "Thomas A. Christensen II" <25492070+MillironX@users.noreply.github.com> Date: Mon, 27 Jun 2022 08:41:42 -0500 Subject: [PATCH 20/27] Add run_krona option to allow skipping Krona chart creation --- conf/test.config | 1 + conf/test_nopreprocessing.config | 1 + nextflow.config | 3 +++ nextflow_schema.json | 3 +++ workflows/taxprofiler.nf | 6 ++++-- 5 files changed, 12 insertions(+), 2 deletions(-) diff --git a/conf/test.config b/conf/test.config index e9fa62e..04dfb4d 100644 --- a/conf/test.config +++ b/conf/test.config @@ -37,6 +37,7 @@ params { run_metaphlan3 = true run_centrifuge = true run_diamond = true + run_krona = true malt_save_reads = true kraken2_save_reads = true centrifuge_save_reads = true diff --git a/conf/test_nopreprocessing.config b/conf/test_nopreprocessing.config index 60cdde8..92fd29d 100644 --- a/conf/test_nopreprocessing.config +++ b/conf/test_nopreprocessing.config @@ -37,6 +37,7 @@ params { run_metaphlan3 = true run_centrifuge = true run_diamond = true + run_krona = true } process { diff --git a/nextflow.config b/nextflow.config index 73fd0b3..29c87cd 100644 --- a/nextflow.config +++ b/nextflow.config @@ -125,6 +125,9 @@ params { run_diamond = false diamond_output_format = 'tsv' // TSV is only format with taxonomic information apparently diamond_save_reads = false // this will override default diamond output format so no taxonomic profile is generated! + + // krona + run_krona = false } // Load base.config by default for all pipelines diff --git a/nextflow_schema.json b/nextflow_schema.json index 2c00348..682bf2f 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -433,6 +433,9 @@ }, "diamond_save_reads": { "type": "boolean" + }, + "run_krona": { + "type": "boolean" } } } diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index 313790c..f7ef07a 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -213,8 +213,10 @@ workflow TAXPROFILER { /* SUBWORKFLOW: VISUALIZATION_KRONA */ - VISUALIZATION_KRONA ( PROFILING.out.profiles ) - ch_versions = ch_versions.mix( VISUALIZATION_KRONA.out.versions ) + if ( params.run_krona ) { + VISUALIZATION_KRONA ( PROFILING.out.profiles ) + ch_versions = ch_versions.mix( VISUALIZATION_KRONA.out.versions ) + } /* MODULE: MultiQC From 23fec89b062d74b2052c31d982a52d301bc2809c Mon Sep 17 00:00:00 2001 From: "Thomas A. Christensen II" <25492070+MillironX@users.noreply.github.com> Date: Mon, 27 Jun 2022 08:52:29 -0500 Subject: [PATCH 21/27] Add Krona chart creation for Centrifuge reports --- subworkflows/local/visualization_krona.nf | 2 ++ 1 file changed, 2 insertions(+) diff --git a/subworkflows/local/visualization_krona.nf b/subworkflows/local/visualization_krona.nf index 87db112..b9f645d 100644 --- a/subworkflows/local/visualization_krona.nf +++ b/subworkflows/local/visualization_krona.nf @@ -20,6 +20,7 @@ workflow VISUALIZATION_KRONA { */ ch_input_profiles = profiles .branch { + centrifuge: it[0]['tool'] == 'centrifuge' kraken2: it[0]['tool'] == 'kraken2' unknown: true } @@ -28,6 +29,7 @@ workflow VISUALIZATION_KRONA { Convert Kraken2 formatted reports into Krona text files */ ch_kraken_reports = ch_input_profiles.kraken2 + .mix( ch_input_profiles.centrifuge ) KRAKENTOOLS_KREPORT2KRONA ( ch_kraken_reports ) ch_krona_text = ch_krona_text.mix( KRAKENTOOLS_KREPORT2KRONA.out.txt ) ch_versions = ch_versions.mix( KRAKENTOOLS_KREPORT2KRONA.out.versions.first() ) From 5b80c7cab9e65702338f2c93e1f9dcfe06ecba28 Mon Sep 17 00:00:00 2001 From: "Thomas A. Christensen II" <25492070+MillironX@users.noreply.github.com> Date: Mon, 27 Jun 2022 08:58:28 -0500 Subject: [PATCH 22/27] Install KAIJU_KAIJU2KRONA --- modules.json | 3 ++ .../nf-core/modules/kaiju/kaiju2krona/main.nf | 39 ++++++++++++++++ .../modules/kaiju/kaiju2krona/meta.yml | 44 +++++++++++++++++++ 3 files changed, 86 insertions(+) create mode 100644 modules/nf-core/modules/kaiju/kaiju2krona/main.nf create mode 100644 modules/nf-core/modules/kaiju/kaiju2krona/meta.yml diff --git a/modules.json b/modules.json index 4956cc8..1770a53 100644 --- a/modules.json +++ b/modules.json @@ -42,6 +42,9 @@ "kaiju/kaiju": { "git_sha": "8856f127c58f6af479128be8b8df4d42e442ddbe" }, + "kaiju/kaiju2krona": { + "git_sha": "2f0b19240430de6807b1232e6d9d0e8084e8a28f" + }, "kaiju/kaiju2table": { "git_sha": "538dbac98ba9c8f799536cd5a617195501439457" }, diff --git a/modules/nf-core/modules/kaiju/kaiju2krona/main.nf b/modules/nf-core/modules/kaiju/kaiju2krona/main.nf new file mode 100644 index 0000000..c95d5a7 --- /dev/null +++ b/modules/nf-core/modules/kaiju/kaiju2krona/main.nf @@ -0,0 +1,39 @@ +process KAIJU_KAIJU2KRONA { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::kaiju=1.8.2" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/kaiju:1.8.2--h5b5514e_1': + 'quay.io/biocontainers/kaiju:1.8.2--h5b5514e_1' }" + + input: + tuple val(meta), path(tsv) + path(db) + + output: + tuple val(meta), path("*.txt"), emit: txt + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + dbnodes=`find -L ${db} -name "*nodes.dmp"` + dbnames=`find -L ${db} -name "*names.dmp"` + kaiju2krona \\ + $args \\ + -t \$dbnodes \\ + -n \$dbnames \\ + -i ${tsv} \\ + -o ${prefix}.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + kaiju: \$(echo \$( kaiju -h 2>&1 | sed -n 1p | sed 's/^.*Kaiju //' )) + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/kaiju/kaiju2krona/meta.yml b/modules/nf-core/modules/kaiju/kaiju2krona/meta.yml new file mode 100644 index 0000000..a0dc2fd --- /dev/null +++ b/modules/nf-core/modules/kaiju/kaiju2krona/meta.yml @@ -0,0 +1,44 @@ +name: kaiju_kaiju2krona +description: Convert Kaiju's tab-separated output file into a tab-separated text file which can be imported into Krona. +keywords: + - taxonomy + - visualisation + - krona chart + - metagenomics +tools: + - "kaiju": + description: Fast and sensitive taxonomic classification for metagenomics + homepage: https://kaiju.binf.ku.dk/ + documentation: https://github.com/bioinformatics-centre/kaiju/blob/master/README.md + tool_dev_url: https://github.com/bioinformatics-centre/kaiju + doi: "10.1038/ncomms11257" + licence: ["GNU GPL v3"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - tsv: + type: file + description: Kaiju tab-separated output file + pattern: "*.{tsv,txt}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - txt: + type: file + description: Krona text-based input file converted from Kaiju report + pattern: "*.{txt,krona}" + +authors: + - "@MillironX" From 821dd844d8e7d87db095b2dc18ddb98cd01a81b4 Mon Sep 17 00:00:00 2001 From: "Thomas A. Christensen II" <25492070+MillironX@users.noreply.github.com> Date: Mon, 27 Jun 2022 09:23:20 -0500 Subject: [PATCH 23/27] Add Kaiju profile conversion to visualization workflow --- subworkflows/local/visualization_krona.nf | 22 ++++++++++++++++++++++ workflows/taxprofiler.nf | 2 +- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/subworkflows/local/visualization_krona.nf b/subworkflows/local/visualization_krona.nf index b9f645d..31dd5bc 100644 --- a/subworkflows/local/visualization_krona.nf +++ b/subworkflows/local/visualization_krona.nf @@ -2,6 +2,7 @@ // Create Krona visualizations // +include { KAIJU_KAIJU2KRONA } from '../../modules/nf-core/modules/kaiju/kaiju2krona/main' include { KRAKENTOOLS_KREPORT2KRONA } from '../../modules/nf-core/modules/krakentools/kreport2krona/main' include { KRONA_CLEANUP } from '../../modules/local/krona_cleanup' include { KRONA_KTIMPORTTEXT } from '../../modules/nf-core/modules/krona/ktimporttext/main' @@ -9,6 +10,7 @@ include { KRONA_KTIMPORTTEXT } from '../../modules/nf-core/modules/krona/ workflow VISUALIZATION_KRONA { take: profiles + databases main: ch_krona_text = Channel.empty() @@ -21,6 +23,7 @@ workflow VISUALIZATION_KRONA { ch_input_profiles = profiles .branch { centrifuge: it[0]['tool'] == 'centrifuge' + kaiju: it[0]['tool'] == 'kaiju' kraken2: it[0]['tool'] == 'kraken2' unknown: true } @@ -34,6 +37,25 @@ workflow VISUALIZATION_KRONA { ch_krona_text = ch_krona_text.mix( KRAKENTOOLS_KREPORT2KRONA.out.txt ) ch_versions = ch_versions.mix( KRAKENTOOLS_KREPORT2KRONA.out.versions.first() ) + /* + Combine Kaiju profiles with their databases + */ + ch_input_for_kaiju2krona = ch_input_profiles.kaiju + .map{ [it[0]['db_name'], it[0], it[1]] } + .combine( databases.map{ [it[0]['db_name'], it[1]] }, by: 0 ) + .multiMap{ + it -> + profiles: [it[1], it[2]] + db: it[3] + } + + /* + Convert Kaiju formatted reports into Krona text files + */ + KAIJU_KAIJU2KRONA( ch_input_for_kaiju2krona.profiles, ch_input_for_kaiju2krona.db ) + ch_krona_text = ch_krona_text.mix( KAIJU_KAIJU2KRONA.out.txt ) + ch_versions = ch_versions.mix( KAIJU_KAIJU2KRONA.out.versions.first() ) + /* Remove taxonomy level annotations from the Krona text files */ diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index f7ef07a..79a2bfd 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -214,7 +214,7 @@ workflow TAXPROFILER { SUBWORKFLOW: VISUALIZATION_KRONA */ if ( params.run_krona ) { - VISUALIZATION_KRONA ( PROFILING.out.profiles ) + VISUALIZATION_KRONA ( PROFILING.out.profiles, DB_CHECK.out.dbs ) ch_versions = ch_versions.mix( VISUALIZATION_KRONA.out.versions ) } From af854f5f346c93f1b1eae655ac5e1808ced1a39c Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Tue, 28 Jun 2022 12:42:41 +0200 Subject: [PATCH 24/27] Update mimetype for database csv as per https://github.com/nf-core/mag/pull/325/files --- nextflow_schema.json | 2 ++ 1 file changed, 2 insertions(+) diff --git a/nextflow_schema.json b/nextflow_schema.json index 682bf2f..fe77913 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -260,6 +260,8 @@ "properties": { "databases": { "type": "string", + "mimetype": "text/csv", + "format": "file-path", "default": "None" }, "shortread_qc_excludeunmerged": { From 4de43040a3403dae9b85679cdda9719ff2bb7117 Mon Sep 17 00:00:00 2001 From: "Thomas A. Christensen II" <25492070+MillironX@users.noreply.github.com> Date: Mon, 27 Jun 2022 12:12:16 -0500 Subject: [PATCH 25/27] Switch to using raw classifications for Kaiju2Krona --- subworkflows/local/profiling.nf | 25 ++++++++++++++--------- subworkflows/local/visualization_krona.nf | 9 ++++++-- workflows/taxprofiler.nf | 2 +- 3 files changed, 23 insertions(+), 13 deletions(-) diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index de5bea1..45c49f5 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -21,7 +21,8 @@ workflow PROFILING { main: ch_versions = Channel.empty() ch_multiqc_files = Channel.empty() - ch_raw_profiles = Channel.empty() + ch_raw_classifications = Channel.empty() + ch_raw_profiles = Channel.empty() /* COMBINE READS WITH POSSIBLE DATABASES @@ -110,6 +111,7 @@ workflow PROFILING { MEGAN_RMA2INFO (ch_maltrun_for_megan, params.malt_generate_megansummary ) ch_multiqc_files = ch_multiqc_files.mix( MALT_RUN.out.log.collect{it[1]}.ifEmpty([]) ) ch_versions = ch_versions.mix( MALT_RUN.out.versions.first(), MEGAN_RMA2INFO.out.versions.first() ) + ch_raw_classifications = ch_raw_classifications.mix( ch_maltrun_for_megan ) ch_raw_profiles = ch_raw_profiles.mix( MEGAN_RMA2INFO.out.txt ) } @@ -124,9 +126,10 @@ workflow PROFILING { } KRAKEN2_KRAKEN2 ( ch_input_for_kraken2.reads, ch_input_for_kraken2.db, params.kraken2_save_reads, params.kraken2_save_readclassification ) - ch_multiqc_files = ch_multiqc_files.mix( KRAKEN2_KRAKEN2.out.report.collect{it[1]}.ifEmpty([]) ) - ch_versions = ch_versions.mix( KRAKEN2_KRAKEN2.out.versions.first() ) - ch_raw_profiles = ch_raw_profiles.mix( KRAKEN2_KRAKEN2.out.report ) + ch_multiqc_files = ch_multiqc_files.mix( KRAKEN2_KRAKEN2.out.report.collect{it[1]}.ifEmpty([]) ) + ch_versions = ch_versions.mix( KRAKEN2_KRAKEN2.out.versions.first() ) + ch_raw_classifications = ch_raw_classifications.mix( KRAKEN2_KRAKEN2.out.classified_reads_assignment ) + ch_raw_profiles = ch_raw_profiles.mix( KRAKEN2_KRAKEN2.out.report ) } @@ -145,8 +148,9 @@ workflow PROFILING { CENTRIFUGE_CENTRIFUGE ( ch_input_for_centrifuge.reads, ch_input_for_centrifuge.db, params.centrifuge_save_reads, params.centrifuge_save_reads, params.centrifuge_save_reads ) CENTRIFUGE_KREPORT (CENTRIFUGE_CENTRIFUGE.out.results, ch_input_for_centrifuge.db) - ch_versions = ch_versions.mix( CENTRIFUGE_CENTRIFUGE.out.versions.first() ) - ch_raw_profiles = ch_raw_profiles.mix( CENTRIFUGE_KREPORT.out.kreport ) + ch_versions = ch_versions.mix( CENTRIFUGE_CENTRIFUGE.out.versions.first() ) + ch_raw_classifications = ch_raw_classifications.mix( CENTRIFUGE_CENTRIFUGE.out.results ) + ch_raw_profiles = ch_raw_profiles.mix( CENTRIFUGE_KREPORT.out.kreport ) } @@ -182,6 +186,7 @@ workflow PROFILING { KAIJU_KAIJU2TABLE (KAIJU_KAIJU.out.results, ch_input_for_kaiju.db, params.kaiju_taxon_name) ch_multiqc_files = ch_multiqc_files.mix( KAIJU_KAIJU2TABLE.out.summary.collect{it[1]}.ifEmpty([]) ) ch_versions = ch_versions.mix( KAIJU_KAIJU.out.versions.first() ) + ch_raw_classifications = ch_raw_classifications.mix( KAIJU_KAIJU.out.results ) ch_raw_profiles = ch_raw_profiles.mix( KAIJU_KAIJU2TABLE.out.summary ) } @@ -206,8 +211,8 @@ workflow PROFILING { } emit: - profiles = ch_raw_profiles // channel: [ val(meta), [ reads ] ] - should be text files or biom - versions = ch_versions // channel: [ versions.yml ] - mqc = ch_multiqc_files + classifications = ch_raw_classifications + profiles = ch_raw_profiles // channel: [ val(meta), [ reads ] ] - should be text files or biom + versions = ch_versions // channel: [ versions.yml ] + mqc = ch_multiqc_files } - diff --git a/subworkflows/local/visualization_krona.nf b/subworkflows/local/visualization_krona.nf index 31dd5bc..c5ca97a 100644 --- a/subworkflows/local/visualization_krona.nf +++ b/subworkflows/local/visualization_krona.nf @@ -9,6 +9,7 @@ include { KRONA_KTIMPORTTEXT } from '../../modules/nf-core/modules/krona/ workflow VISUALIZATION_KRONA { take: + classifications profiles databases @@ -23,10 +24,14 @@ workflow VISUALIZATION_KRONA { ch_input_profiles = profiles .branch { centrifuge: it[0]['tool'] == 'centrifuge' - kaiju: it[0]['tool'] == 'kaiju' kraken2: it[0]['tool'] == 'kraken2' unknown: true } + ch_input_classifications = classifications + .branch { + kaiju: it[0]['tool'] == 'kaiju' + unknown: true + } /* Convert Kraken2 formatted reports into Krona text files @@ -40,7 +45,7 @@ workflow VISUALIZATION_KRONA { /* Combine Kaiju profiles with their databases */ - ch_input_for_kaiju2krona = ch_input_profiles.kaiju + ch_input_for_kaiju2krona = ch_input_classifications.kaiju .map{ [it[0]['db_name'], it[0], it[1]] } .combine( databases.map{ [it[0]['db_name'], it[1]] }, by: 0 ) .multiMap{ diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index 79a2bfd..7eec13d 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -214,7 +214,7 @@ workflow TAXPROFILER { SUBWORKFLOW: VISUALIZATION_KRONA */ if ( params.run_krona ) { - VISUALIZATION_KRONA ( PROFILING.out.profiles, DB_CHECK.out.dbs ) + VISUALIZATION_KRONA ( PROFILING.out.classifications, PROFILING.out.profiles, DB_CHECK.out.dbs ) ch_versions = ch_versions.mix( VISUALIZATION_KRONA.out.versions ) } From 069370db8610aedd557438b0f071771cf97832fd Mon Sep 17 00:00:00 2001 From: "Thomas A. Christensen II" <25492070+MillironX@users.noreply.github.com> Date: Mon, 27 Jun 2022 13:39:42 -0500 Subject: [PATCH 26/27] Add unclassified and verbose flags to kaiju2krona --- conf/modules.config | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/conf/modules.config b/conf/modules.config index f9d0329..6563cc9 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -351,6 +351,10 @@ process { ] } + withName: KAIJU_KAIJU2KRONA { + ext.args = '-v -u' + } + withName: DIAMOND_BLASTX { ext.args = { "${meta.db_params}" } ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } From 6673ccf8d8bb0883d5b066e16ac59293830a80c1 Mon Sep 17 00:00:00 2001 From: "Thomas A. Christensen II" <25492070+MillironX@users.noreply.github.com> Date: Tue, 28 Jun 2022 13:13:37 +0000 Subject: [PATCH 27/27] Update channel assignment alignment in profiling subworkflow Co-authored-by: James A. Fellows Yates --- subworkflows/local/profiling.nf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index 45c49f5..c10ef5b 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -109,10 +109,10 @@ workflow PROFILING { } MEGAN_RMA2INFO (ch_maltrun_for_megan, params.malt_generate_megansummary ) - ch_multiqc_files = ch_multiqc_files.mix( MALT_RUN.out.log.collect{it[1]}.ifEmpty([]) ) - ch_versions = ch_versions.mix( MALT_RUN.out.versions.first(), MEGAN_RMA2INFO.out.versions.first() ) + ch_multiqc_files = ch_multiqc_files.mix( MALT_RUN.out.log.collect{it[1]}.ifEmpty([]) ) + ch_versions = ch_versions.mix( MALT_RUN.out.versions.first(), MEGAN_RMA2INFO.out.versions.first() ) ch_raw_classifications = ch_raw_classifications.mix( ch_maltrun_for_megan ) - ch_raw_profiles = ch_raw_profiles.mix( MEGAN_RMA2INFO.out.txt ) + ch_raw_profiles = ch_raw_profiles.mix( MEGAN_RMA2INFO.out.txt ) }