From 4352d35937560e977904f1e9154691b76aff41f8 Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Fri, 14 Oct 2022 10:52:17 +0200 Subject: [PATCH 01/23] chore: install Bracken module --- modules.json | 4 +++ modules/nf-core/bracken/bracken/main.nf | 45 ++++++++++++++++++++++++ modules/nf-core/bracken/bracken/meta.yml | 45 ++++++++++++++++++++++++ 3 files changed, 94 insertions(+) create mode 100644 modules/nf-core/bracken/bracken/main.nf create mode 100644 modules/nf-core/bracken/bracken/meta.yml diff --git a/modules.json b/modules.json index 10d6c74..8f89a11 100644 --- a/modules.json +++ b/modules.json @@ -21,6 +21,10 @@ "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" }, + "bracken/bracken": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, "cat/fastq": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" diff --git a/modules/nf-core/bracken/bracken/main.nf b/modules/nf-core/bracken/bracken/main.nf new file mode 100644 index 0000000..5e08418 --- /dev/null +++ b/modules/nf-core/bracken/bracken/main.nf @@ -0,0 +1,45 @@ +process BRACKEN_BRACKEN { + tag "$meta.id" + label 'process_low' + + // WARN: Version information not provided by tool on CLI. + // Please update version string below when bumping container versions. + conda (params.enable_conda ? "bioconda::bracken=2.7" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bracken:2.7--py39hc16433a_0': + 'quay.io/biocontainers/bracken:2.7--py39hc16433a_0' }" + + input: + tuple val(meta), path(kraken_report) + path database + + output: + tuple val(meta), path(bracken_report), emit: reports + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def threshold = meta.threshold ?: 10 + def taxonomic_level = meta.taxonomic_level ?: 'S' + def read_length = meta.read_length ?: 150 + def args = task.ext.args ?: "-l ${taxonomic_level} -t ${threshold} -r ${read_length}" + def prefix = task.ext.prefix ?: "${meta.id}" + bracken_report = "${prefix}_${taxonomic_level}.tsv" + // WARN: Version information not provided by tool on CLI. + // Please update version string below when bumping container versions. + def VERSION = '2.7' + """ + bracken \\ + ${args} \\ + -d '${database}' \\ + -i '${kraken_report}' \\ + -o '${bracken_report}' + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bracken: ${VERSION} + END_VERSIONS + """ +} diff --git a/modules/nf-core/bracken/bracken/meta.yml b/modules/nf-core/bracken/bracken/meta.yml new file mode 100644 index 0000000..1931161 --- /dev/null +++ b/modules/nf-core/bracken/bracken/meta.yml @@ -0,0 +1,45 @@ +name: bracken_bracken +description: Re-estimate taxonomic abundance of metagenomic samples analyzed by kraken. +keywords: + - sort +tools: + - bracken: + description: Bracken (Bayesian Reestimation of Abundance with KrakEN) is a highly accurate statistical method that computes the abundance of species in DNA sequences from a metagenomics sample. + homepage: https://ccb.jhu.edu/software/bracken/ + documentation: https://ccb.jhu.edu/software/bracken/index.shtml?t=manual + tool_dev_url: https://github.com/jenniferlu717/Bracken + doi: "10.7717/peerj-cs.104" + licence: ["GPL v3"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - kraken_report: + type: file + description: TSV file with six columns coming from kraken2 output + pattern: "*.{tsv}" + - database: + type: file + description: Directory containing the kraken2/Bracken files for analysis + pattern: "*" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - reports: + type: file + description: TSV output report of the re-estimated abundances + pattern: "*.{tsv}" + +authors: + - "@Midnighter" From 4f728648f59c35f7282e832090302047c7786f2a Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Fri, 14 Oct 2022 10:56:14 +0200 Subject: [PATCH 02/23] feat: add process to standardize kraken report --- modules/local/kraken_standard_report.nf | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 modules/local/kraken_standard_report.nf diff --git a/modules/local/kraken_standard_report.nf b/modules/local/kraken_standard_report.nf new file mode 100644 index 0000000..93109aa --- /dev/null +++ b/modules/local/kraken_standard_report.nf @@ -0,0 +1,24 @@ +process KRAKEN_STANDARD_REPORT { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? 'conda-forge::sed=4.8' : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container 'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv2/biocontainers_v1.2.0_cv2.img' + } else { + container 'biocontainers/biocontainers:v1.2.0_cv2' + } + + input: + tuple val(meta), path(report) + + output: + tuple val(meta), path(result), emit: report + + script: + result = "${report.baseName}_standardized.kraken2.report.txt" + """ + cut -f1-3,6-8 "${report}" > "${result}" + """ +} + From ca38d002dee9510ca687403a617aec744ab9d46d Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Fri, 14 Oct 2022 12:18:07 +0200 Subject: [PATCH 03/23] feat: integrate Bracken into the profiling pipeline --- conf/modules.config | 11 +++++++++- modules/local/kraken_standard_report.nf | 18 +++++++++------- nextflow.config | 4 ++++ subworkflows/local/profiling.nf | 28 +++++++++++++++++++++++++ 4 files changed, 52 insertions(+), 9 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index d2a0051..fff28ea 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -277,7 +277,7 @@ process { } withName: KRAKEN2_KRAKEN2 { - ext.args = { "${meta.db_params}" } + ext.args = params.kraken2_save_minimizers ? { "${meta.db_params} --report-minimizer-data" } : { "${meta.db_params}" } ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } publishDir = [ path: { "${params.outdir}/kraken2/${meta.db_name}/" }, @@ -286,6 +286,15 @@ process { ] } + withName: BRACKEN_BRACKEN { + errorStrategy = 'ignore' + publishDir = [ + path: { "${params.outdir}/bracken/${meta.db_name}/" }, + mode: params.publish_dir_mode, + pattern: '*.tsv' + ] + } + withName: KRAKENTOOLS_COMBINEKREPORTS { ext.prefix = { "kraken2_${meta.id}_combined_reports" } publishDir = [ diff --git a/modules/local/kraken_standard_report.nf b/modules/local/kraken_standard_report.nf index 93109aa..ab47d81 100644 --- a/modules/local/kraken_standard_report.nf +++ b/modules/local/kraken_standard_report.nf @@ -1,13 +1,11 @@ process KRAKEN_STANDARD_REPORT { tag "$meta.id" - label 'process_low' + label 'process_single' conda (params.enable_conda ? 'conda-forge::sed=4.8' : null) - if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container 'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv2/biocontainers_v1.2.0_cv2.img' - } else { - container 'biocontainers/biocontainers:v1.2.0_cv2' - } + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv2/biocontainers_v1.2.0_cv2.img' : + 'biocontainers/biocontainers:v1.2.0_cv2' }" input: tuple val(meta), path(report) @@ -15,10 +13,14 @@ process KRAKEN_STANDARD_REPORT { output: tuple val(meta), path(result), emit: report + when: + task.ext.when == null || task.ext.when + script: - result = "${report.baseName}_standardized.kraken2.report.txt" + def prefix = task.ext.prefix ?: "${meta.id}" + result = "${prefix}_standardized.kraken2.report.txt" """ - cut -f1-3,6-8 "${report}" > "${result}" + cut -f1-3,6-8 '${report}' > '${result}' """ } diff --git a/nextflow.config b/nextflow.config index efb5aff..4f32a00 100644 --- a/nextflow.config +++ b/nextflow.config @@ -114,6 +114,10 @@ params { run_kraken2 = false kraken2_save_reads = false // added directly to module in profiling.nf kraken2_save_readclassification = false // added directly to module in profiling.nf + kraken2_save_minimizers = false + + // Bracken + run_bracken = true // centrifuge run_centrifuge = false diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index 11c4a72..970c128 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -5,6 +5,7 @@ include { MALT_RUN } from '../../modules/nf-core/malt/run/main' include { MEGAN_RMA2INFO as MEGAN_RMA2INFO_TSV } from '../../modules/nf-core/megan/rma2info/main' include { KRAKEN2_KRAKEN2 } from '../../modules/nf-core/kraken2/kraken2/main' +include { BRACKEN_BRACKEN } from '../../modules/nf-core/bracken/bracken/main' include { CENTRIFUGE_CENTRIFUGE } from '../../modules/nf-core/centrifuge/centrifuge/main' include { CENTRIFUGE_KREPORT } from '../../modules/nf-core/centrifuge/kreport/main' include { METAPHLAN3_METAPHLAN3 } from '../../modules/nf-core/metaphlan3/metaphlan3/main' @@ -133,6 +134,33 @@ workflow PROFILING { } + if ( params.run_kraken2 && params.run_bracken ) { + + def ch_input_for_bracken + + if (params.kraken2_save_minimizers) { + ch_input_for_bracken = KRAKEN_STANDARD_REPORT(KRAKEN2_KRAKEN2.out.report).report + } else { + ch_input_for_bracken = KRAKEN2_KRAKEN2.out.report + } + + ch_input_for_bracken = ch_input_for_bracken + .combine( + databases.filter { meta, db -> + meta['tool'] == 'bracken' + } + ) + .multiMap { meta, report, db_meta, db -> + report: [meta + db_meta, report] + db: db + } + + BRACKEN_BRACKEN(ch_input_for_bracken.report, ch_input_for_bracken.db) + ch_versions = ch_versions.mix(BRACKEN_BRACKEN.out.versions.first()) + ch_raw_profiles = ch_raw_profiles.mix(BRACKEN_BRACKEN.out.reports) + + } + if ( params.run_centrifuge ) { ch_input_for_centrifuge = ch_input_for_profiling.centrifuge From 767663d975040baed045d7dcc49a290e292b54a8 Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Fri, 14 Oct 2022 12:18:55 +0200 Subject: [PATCH 04/23] chore: add Bracken to test configs --- conf/test.config | 1 + conf/test_nopreprocessing.config | 1 + conf/test_noprofiling.config | 1 + conf/test_nothing.config | 1 + conf/test_pep.config | 1 + 5 files changed, 5 insertions(+) diff --git a/conf/test.config b/conf/test.config index d5dcd67..777d9bf 100644 --- a/conf/test.config +++ b/conf/test.config @@ -34,6 +34,7 @@ params { hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta' run_kaiju = true run_kraken2 = true + run_bracken = true run_malt = true run_metaphlan3 = true run_centrifuge = true diff --git a/conf/test_nopreprocessing.config b/conf/test_nopreprocessing.config index 3908b56..357f76f 100644 --- a/conf/test_nopreprocessing.config +++ b/conf/test_nopreprocessing.config @@ -33,6 +33,7 @@ params { hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta' run_kaiju = true run_kraken2 = true + run_bracken = true run_malt = true run_metaphlan3 = true run_centrifuge = true diff --git a/conf/test_noprofiling.config b/conf/test_noprofiling.config index 12c7185..59ed0da 100644 --- a/conf/test_noprofiling.config +++ b/conf/test_noprofiling.config @@ -34,6 +34,7 @@ params { hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta' run_kaiju = false run_kraken2 = false + run_bracken = false run_malt = false run_metaphlan3 = false run_centrifuge = false diff --git a/conf/test_nothing.config b/conf/test_nothing.config index c0ecece..df09613 100644 --- a/conf/test_nothing.config +++ b/conf/test_nothing.config @@ -33,6 +33,7 @@ params { hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta' run_kaiju = false run_kraken2 = false + run_bracken = false run_malt = false run_metaphlan3 = false run_centrifuge = false diff --git a/conf/test_pep.config b/conf/test_pep.config index 7f8c95d..762ebb3 100644 --- a/conf/test_pep.config +++ b/conf/test_pep.config @@ -19,6 +19,7 @@ params { hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta' run_kaiju = true run_kraken2 = true + run_bracken = true run_malt = true run_metaphlan3 = true run_centrifuge = true From 7f8180f45841cc3ffb40ca70c564a8d14ed254f7 Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Fri, 14 Oct 2022 12:49:07 +0200 Subject: [PATCH 05/23] fix: emit tool versions --- modules/local/kraken_standard_report.nf | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/modules/local/kraken_standard_report.nf b/modules/local/kraken_standard_report.nf index ab47d81..aada6fb 100644 --- a/modules/local/kraken_standard_report.nf +++ b/modules/local/kraken_standard_report.nf @@ -12,6 +12,7 @@ process KRAKEN_STANDARD_REPORT { output: tuple val(meta), path(result), emit: report + path 'versions.yml' , emit: versions when: task.ext.when == null || task.ext.when @@ -21,6 +22,11 @@ process KRAKEN_STANDARD_REPORT { result = "${prefix}_standardized.kraken2.report.txt" """ cut -f1-3,6-8 '${report}' > '${result}' + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cut: \$(echo \$(cut --version 2>&1) | sed 's/^.*(GNU coreutils) //; s/ Copyright.*\$//') + END_VERSIONS """ } From c4f64682751383b6144d14d05cfa48e2d10b6235 Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Fri, 14 Oct 2022 12:55:26 +0200 Subject: [PATCH 06/23] chore: update parameters schema --- nextflow_schema.json | 61 +++++++++++++++++++++++++++++++++++++++----- 1 file changed, 54 insertions(+), 7 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index f88443f..c40ce0e 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,7 +10,11 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": ["input", "outdir", "databases"], + "required": [ + "input", + "databases", + "outdir" + ], "properties": { "input": { "type": "string", @@ -80,7 +84,10 @@ "shortread_qc_tool": { "type": "string", "default": "fastp", - "enum": ["fastp", "adapterremoval"], + "enum": [ + "fastp", + "adapterremoval" + ], "fa_icon": "fas fa-tools", "description": "Specify which tool to use for short-read QC" }, @@ -133,7 +140,11 @@ "shortread_complexityfilter_tool": { "type": "string", "default": "bbduk", - "enum": ["bbduk", "prinseqplusplus", "fastp"], + "enum": [ + "bbduk", + "prinseqplusplus", + "fastp" + ], "fa_icon": "fas fa-hammer", "description": "Specify which tool to use for complexity filtering" }, @@ -167,7 +178,10 @@ "shortread_complexityfilter_prinseqplusplus_mode": { "type": "string", "default": "entropy", - "enum": ["entropy", "dust"], + "enum": [ + "entropy", + "dust" + ], "fa_icon": "fas fa-check-square", "description": "Specify the complexity filter mode for PRINSEQ++" }, @@ -341,7 +355,15 @@ "diamond_output_format": { "type": "string", "default": "tsv", - "enum": ["blast", "xml", "txt", "daa", "sam", "tsv", "paf"], + "enum": [ + "blast", + "xml", + "txt", + "daa", + "sam", + "tsv", + "paf" + ], "fa_icon": "fas fa-file", "description": "Specify output format from DIAMOND profiling.", "help_text": "DIAMOND can produce output in a number of different formats, you can specify here which to produce.\n\nNote that DIAMOND can only produce one format at a time, and depending on which you pick, some downstream steps may not be executed. For example, selecting `daa` or `sam` will mean you will not get a tabular taxonomic profile as with the other tools.\n\nWill be overriden by `--diamond_save_reads.`\n\n> Modifies tool parameter(s):\n> - diamond blastx: `--outfmt`" @@ -360,7 +382,14 @@ "kaiju_taxon_rank": { "type": "string", "default": "species", - "enum": ["phylum", "class", "order", "family", "genus", "species"], + "enum": [ + "phylum", + "class", + "order", + "family", + "genus", + "species" + ], "fa_icon": "fas fa-tag", "description": "Specify taxonomic rank to be displayed in Kaiju taxon table", "help_text": "Specify the taxonomic level(s) to be displayed in the resulting Kaiju taxon table, as generated by the kaiju2table helper tool.\n\nThis can be either a single level (e.g. `species`), or a comma separated list to display the full taxonomic path (e.g. `superkingdom,phylum,class,order,family,genus,species.`).\n\n> Modifies tool parameter(s):\n> - kaiju2table: `-l`" @@ -382,6 +411,17 @@ "description": "Turn on saving of Kraken2 per-read taxonomic assignment file", "help_text": "Save a text file that contains a list of each read that had a taxonomic assignment, with information on specific taxonomic taxonomic assignment that that read recieved.\n\n> Modifies tool parameter(s):\n> - kraken2: `--output`" }, + "kraken2_save_minimizers": { + "type": "boolean", + "description": "Turn on saving minimizer information in the kraken2 report thus increasing to an eight column layout.", + "fa_icon": "fas fa-save", + "help_text": "Turn on saving minimizer information in the kraken2 report thus increasing to an eight column layout.\n\nAdds `--report-minimizer-data` to the kraken2 command." + }, + "run_bracken": { + "type": "boolean", + "description": "Post-process kraken2 reports with Bracken.", + "fa_icon": "fas fa-toggle-on" + }, "run_malt": { "type": "boolean", "fa_icon": "fas fa-toggle-on", @@ -555,7 +595,14 @@ "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "enum": [ + "symlink", + "rellink", + "link", + "copy", + "copyNoFollow", + "move" + ], "hidden": true }, "email_on_fail": { From 9dc819ceff9db68b214573062f07a7dad60ef593 Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Fri, 14 Oct 2022 13:16:54 +0200 Subject: [PATCH 07/23] style: reformat schema --- nextflow_schema.json | 50 +++++++------------------------------------- 1 file changed, 7 insertions(+), 43 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index c40ce0e..97a03b5 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,11 +10,7 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": [ - "input", - "databases", - "outdir" - ], + "required": ["input", "databases", "outdir"], "properties": { "input": { "type": "string", @@ -84,10 +80,7 @@ "shortread_qc_tool": { "type": "string", "default": "fastp", - "enum": [ - "fastp", - "adapterremoval" - ], + "enum": ["fastp", "adapterremoval"], "fa_icon": "fas fa-tools", "description": "Specify which tool to use for short-read QC" }, @@ -140,11 +133,7 @@ "shortread_complexityfilter_tool": { "type": "string", "default": "bbduk", - "enum": [ - "bbduk", - "prinseqplusplus", - "fastp" - ], + "enum": ["bbduk", "prinseqplusplus", "fastp"], "fa_icon": "fas fa-hammer", "description": "Specify which tool to use for complexity filtering" }, @@ -178,10 +167,7 @@ "shortread_complexityfilter_prinseqplusplus_mode": { "type": "string", "default": "entropy", - "enum": [ - "entropy", - "dust" - ], + "enum": ["entropy", "dust"], "fa_icon": "fas fa-check-square", "description": "Specify the complexity filter mode for PRINSEQ++" }, @@ -355,15 +341,7 @@ "diamond_output_format": { "type": "string", "default": "tsv", - "enum": [ - "blast", - "xml", - "txt", - "daa", - "sam", - "tsv", - "paf" - ], + "enum": ["blast", "xml", "txt", "daa", "sam", "tsv", "paf"], "fa_icon": "fas fa-file", "description": "Specify output format from DIAMOND profiling.", "help_text": "DIAMOND can produce output in a number of different formats, you can specify here which to produce.\n\nNote that DIAMOND can only produce one format at a time, and depending on which you pick, some downstream steps may not be executed. For example, selecting `daa` or `sam` will mean you will not get a tabular taxonomic profile as with the other tools.\n\nWill be overriden by `--diamond_save_reads.`\n\n> Modifies tool parameter(s):\n> - diamond blastx: `--outfmt`" @@ -382,14 +360,7 @@ "kaiju_taxon_rank": { "type": "string", "default": "species", - "enum": [ - "phylum", - "class", - "order", - "family", - "genus", - "species" - ], + "enum": ["phylum", "class", "order", "family", "genus", "species"], "fa_icon": "fas fa-tag", "description": "Specify taxonomic rank to be displayed in Kaiju taxon table", "help_text": "Specify the taxonomic level(s) to be displayed in the resulting Kaiju taxon table, as generated by the kaiju2table helper tool.\n\nThis can be either a single level (e.g. `species`), or a comma separated list to display the full taxonomic path (e.g. `superkingdom,phylum,class,order,family,genus,species.`).\n\n> Modifies tool parameter(s):\n> - kaiju2table: `-l`" @@ -595,14 +566,7 @@ "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": [ - "symlink", - "rellink", - "link", - "copy", - "copyNoFollow", - "move" - ], + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], "hidden": true }, "email_on_fail": { From 95621ebd8ad4b89e2cdad0594f64125d07ac3f66 Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Fri, 14 Oct 2022 14:47:08 +0200 Subject: [PATCH 08/23] fix: include local module --- subworkflows/local/profiling.nf | 1 + 1 file changed, 1 insertion(+) diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index 970c128..4d49ce2 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -5,6 +5,7 @@ include { MALT_RUN } from '../../modules/nf-core/malt/run/main' include { MEGAN_RMA2INFO as MEGAN_RMA2INFO_TSV } from '../../modules/nf-core/megan/rma2info/main' include { KRAKEN2_KRAKEN2 } from '../../modules/nf-core/kraken2/kraken2/main' +include { KRAKEN_STANDARD_REPORT } from '../../modules/local/kraken_standard_report' include { BRACKEN_BRACKEN } from '../../modules/nf-core/bracken/bracken/main' include { CENTRIFUGE_CENTRIFUGE } from '../../modules/nf-core/centrifuge/centrifuge/main' include { CENTRIFUGE_KREPORT } from '../../modules/nf-core/centrifuge/kreport/main' From 563b34c9c38535a11a3c14d488fcd97519972d81 Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Fri, 14 Oct 2022 14:48:27 +0200 Subject: [PATCH 09/23] refactor: rename process to kraken2 --- .../{kraken_standard_report.nf => kraken2_standard_report.nf} | 2 +- subworkflows/local/profiling.nf | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) rename modules/local/{kraken_standard_report.nf => kraken2_standard_report.nf} (96%) diff --git a/modules/local/kraken_standard_report.nf b/modules/local/kraken2_standard_report.nf similarity index 96% rename from modules/local/kraken_standard_report.nf rename to modules/local/kraken2_standard_report.nf index aada6fb..09a98c1 100644 --- a/modules/local/kraken_standard_report.nf +++ b/modules/local/kraken2_standard_report.nf @@ -1,4 +1,4 @@ -process KRAKEN_STANDARD_REPORT { +process KRAKEN2_STANDARD_REPORT { tag "$meta.id" label 'process_single' diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index 4d49ce2..0d46e0c 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -5,7 +5,7 @@ include { MALT_RUN } from '../../modules/nf-core/malt/run/main' include { MEGAN_RMA2INFO as MEGAN_RMA2INFO_TSV } from '../../modules/nf-core/megan/rma2info/main' include { KRAKEN2_KRAKEN2 } from '../../modules/nf-core/kraken2/kraken2/main' -include { KRAKEN_STANDARD_REPORT } from '../../modules/local/kraken_standard_report' +include { KRAKEN2_STANDARD_REPORT } from '../../modules/local/kraken2_standard_report' include { BRACKEN_BRACKEN } from '../../modules/nf-core/bracken/bracken/main' include { CENTRIFUGE_CENTRIFUGE } from '../../modules/nf-core/centrifuge/centrifuge/main' include { CENTRIFUGE_KREPORT } from '../../modules/nf-core/centrifuge/kreport/main' @@ -140,7 +140,7 @@ workflow PROFILING { def ch_input_for_bracken if (params.kraken2_save_minimizers) { - ch_input_for_bracken = KRAKEN_STANDARD_REPORT(KRAKEN2_KRAKEN2.out.report).report + ch_input_for_bracken = KRAKEN2_STANDARD_REPORT(KRAKEN2_KRAKEN2.out.report).report } else { ch_input_for_bracken = KRAKEN2_KRAKEN2.out.report } From fd8a0df2a68fb17a0422b281458d1267b824d004 Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Mon, 17 Oct 2022 17:16:21 +0200 Subject: [PATCH 10/23] refactor: add warning when run_kraken2 is missing --- workflows/taxprofiler.nf | 1 + 1 file changed, 1 insertion(+) diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index 8b9edb7..f6dc3ed 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -41,6 +41,7 @@ if (params.longread_hostremoval_index ) { ch_longread_reference_index = fi if (params.diamond_save_reads ) log.warn "[nf-core/taxprofiler] DIAMOND only allows output of a single format. As --diamond_save_reads supplied, only aligned reads in SAM format will be produced, no taxonomic profiles will be available." if (params.run_malt && params.run_krona && !params.krona_taxonomy_directory) log.warn "[nf-core/taxprofiler] Krona can only be run on MALT output if path to Krona taxonomy database supplied to --krona_taxonomy_directory. Krona will not be executed in this run for MALT." +if (params.run_bracken && !params.run_kraken2) log.warn '[nf-core/taxprofiler] You are attempting to run Bracken without running kraken2. This is not possible! Please set --run_kraken2 as well.' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From 48b00b7d624d9d37a4b891a578ea23e805bf6543 Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Mon, 17 Oct 2022 17:33:24 +0200 Subject: [PATCH 11/23] refactor: create standardized Bracken reports --- .../local/standardisation_profiles.nf | 25 +++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/standardisation_profiles.nf b/subworkflows/local/standardisation_profiles.nf index cbb0fab..f560424 100644 --- a/subworkflows/local/standardisation_profiles.nf +++ b/subworkflows/local/standardisation_profiles.nf @@ -3,8 +3,11 @@ // include { KAIJU_KAIJU2TABLE } from '../../modules/nf-core/kaiju/kaiju2table/main' -include { KRAKENTOOLS_COMBINEKREPORTS } from '../../modules/nf-core/krakentools/combinekreports/main' -include { KRAKENTOOLS_COMBINEKREPORTS as KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE } from '../../modules/nf-core/krakentools/combinekreports/main' +include { + KRAKENTOOLS_COMBINEKREPORTS as KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE; + KRAKENTOOLS_COMBINEKREPORTS as KRAKENTOOLS_COMBINEKREPORTS_BRACKEN; + KRAKENTOOLS_COMBINEKREPORTS +} from '../../modules/nf-core/krakentools/combinekreports/main' include { METAPHLAN3_MERGEMETAPHLANTABLES } from '../../modules/nf-core/metaphlan3/mergemetaphlantables/main' include { MOTUS_MERGE } from '../../modules/nf-core/motus/merge/main' @@ -27,6 +30,7 @@ workflow STANDARDISATION_PROFILES { .branch { motus: it[0]['tool'] == 'motus' kraken2: it[0]['tool'] == 'kraken2' + bracken: it[0]['tool'] == 'bracken' centrifuge: it[0]['tool'] == 'centrifuge' metaphlan3: it[0]['tool'] == 'metaphlan3' unknown: true @@ -98,6 +102,23 @@ workflow STANDARDISATION_PROFILES { ch_multiqc_files = ch_multiqc_files.mix( KRAKENTOOLS_COMBINEKREPORTS.out.txt ) ch_versions = ch_versions.mix( KRAKENTOOLS_COMBINEKREPORTS.out.versions ) + // Bracken + + // Collect and replace id for db_name for prefix + // Have to sort by size to ensure first file actually has hits otherwise + // the script fails + ch_profiles_for_bracken = ch_input_profiles.bracken + .map { [it[0]['db_name'], it[1]] } + .groupTuple(sort: {-it.size()} ) + .map { + [[id:it[0]], it[1]] + } + + KRAKENTOOLS_COMBINEKREPORTS_BRACKEN ( ch_profiles_for_bracken ) + ch_standardised_tables = ch_standardised_tables.mix( KRAKENTOOLS_COMBINEKREPORTS_BRACKEN.out.txt ) + ch_multiqc_files = ch_multiqc_files.mix( KRAKENTOOLS_COMBINEKREPORTS_BRACKEN.out.txt ) + ch_versions = ch_versions.mix( KRAKENTOOLS_COMBINEKREPORTS_BRACKEN.out.versions ) + // MetaPhlAn3 ch_profiles_for_metaphlan3 = ch_input_profiles.metaphlan3 From 8768a16d63231e07daf3d8cf8fbf1afd930b2a1c Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Mon, 17 Oct 2022 17:35:41 +0200 Subject: [PATCH 12/23] refactor: turn warning into error --- workflows/taxprofiler.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index f6dc3ed..12ee19f 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -41,7 +41,7 @@ if (params.longread_hostremoval_index ) { ch_longread_reference_index = fi if (params.diamond_save_reads ) log.warn "[nf-core/taxprofiler] DIAMOND only allows output of a single format. As --diamond_save_reads supplied, only aligned reads in SAM format will be produced, no taxonomic profiles will be available." if (params.run_malt && params.run_krona && !params.krona_taxonomy_directory) log.warn "[nf-core/taxprofiler] Krona can only be run on MALT output if path to Krona taxonomy database supplied to --krona_taxonomy_directory. Krona will not be executed in this run for MALT." -if (params.run_bracken && !params.run_kraken2) log.warn '[nf-core/taxprofiler] You are attempting to run Bracken without running kraken2. This is not possible! Please set --run_kraken2 as well.' +if (params.run_bracken && !params.run_kraken2) exit 1, 'ERROR: [nf-core/taxprofiler] You are attempting to run Bracken without running kraken2. This is not possible! Please set --run_kraken2 as well.' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From c7c7a7f6783646a492bb31855d2d47fb36da37f8 Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Wed, 19 Oct 2022 16:20:09 +0200 Subject: [PATCH 13/23] refactor: change Bracken prefix to be more specific --- conf/modules.config | 1 + 1 file changed, 1 insertion(+) diff --git a/conf/modules.config b/conf/modules.config index fff28ea..7ec9210 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -288,6 +288,7 @@ process { withName: BRACKEN_BRACKEN { errorStrategy = 'ignore' + ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } publishDir = [ path: { "${params.outdir}/bracken/${meta.db_name}/" }, mode: params.publish_dir_mode, From b73b4798600d952575b62a63964b61143b54a438 Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Wed, 19 Oct 2022 16:21:36 +0200 Subject: [PATCH 14/23] refactor: handle combined kraken2+bracken tool column --- subworkflows/local/profiling.nf | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index 0d46e0c..5e2a7d2 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -41,7 +41,7 @@ workflow PROFILING { .combine(databases) .branch { malt: it[2]['tool'] == 'malt' - kraken2: it[2]['tool'] == 'kraken2' + kraken2: it[2]['tool'].contains('kraken2') metaphlan3: it[2]['tool'] == 'metaphlan3' centrifuge: it[2]['tool'] == 'centrifuge' kaiju: it[2]['tool'] == 'kaiju' @@ -131,34 +131,41 @@ workflow PROFILING { ch_multiqc_files = ch_multiqc_files.mix( KRAKEN2_KRAKEN2.out.report ) ch_versions = ch_versions.mix( KRAKEN2_KRAKEN2.out.versions.first() ) ch_raw_classifications = ch_raw_classifications.mix( KRAKEN2_KRAKEN2.out.classified_reads_assignment ) - ch_raw_profiles = ch_raw_profiles.mix( KRAKEN2_KRAKEN2.out.report ) + ch_raw_profiles = ch_raw_profiles.mix( + KRAKEN2_KRAKEN2.out.report + // Set the tool to be strictly 'kraken2' instead of potentially 'kraken2+bracken' for downstream use. + .map { meta, report -> [meta + [tool: 'kraken2'], report]} + ) } if ( params.run_kraken2 && params.run_bracken ) { - def ch_input_for_bracken + def ch_input_for_bracken = KRAKEN2_KRAKEN2.out.report + .filter { meta, report -> meta['tool'].contains('bracken') } if (params.kraken2_save_minimizers) { - ch_input_for_bracken = KRAKEN2_STANDARD_REPORT(KRAKEN2_KRAKEN2.out.report).report - } else { - ch_input_for_bracken = KRAKEN2_KRAKEN2.out.report + ch_input_for_bracken = KRAKEN2_STANDARD_REPORT(ch_input_for_bracken).report } ch_input_for_bracken = ch_input_for_bracken .combine( databases.filter { meta, db -> - meta['tool'] == 'bracken' + meta['tool'].contains('bracken') } ) .multiMap { meta, report, db_meta, db -> - report: [meta + db_meta, report] + report: [meta, report] db: db } BRACKEN_BRACKEN(ch_input_for_bracken.report, ch_input_for_bracken.db) ch_versions = ch_versions.mix(BRACKEN_BRACKEN.out.versions.first()) - ch_raw_profiles = ch_raw_profiles.mix(BRACKEN_BRACKEN.out.reports) + ch_raw_profiles = ch_raw_profiles.mix( + BRACKEN_BRACKEN.out.reports + // Set the tool to be strictly 'bracken' instead of potentially 'kraken2+bracken' for downstream use. + .map { meta, report -> [meta + [tool: 'bracken'], report]} + ) } From 8deb7757d8a260fb2bc4c5890a10995181d1bafe Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Wed, 19 Oct 2022 16:21:57 +0200 Subject: [PATCH 15/23] fix: remove Bracken from standardise reports --- .../local/standardisation_profiles.nf | 25 ++----------------- 1 file changed, 2 insertions(+), 23 deletions(-) diff --git a/subworkflows/local/standardisation_profiles.nf b/subworkflows/local/standardisation_profiles.nf index f560424..cbb0fab 100644 --- a/subworkflows/local/standardisation_profiles.nf +++ b/subworkflows/local/standardisation_profiles.nf @@ -3,11 +3,8 @@ // include { KAIJU_KAIJU2TABLE } from '../../modules/nf-core/kaiju/kaiju2table/main' -include { - KRAKENTOOLS_COMBINEKREPORTS as KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE; - KRAKENTOOLS_COMBINEKREPORTS as KRAKENTOOLS_COMBINEKREPORTS_BRACKEN; - KRAKENTOOLS_COMBINEKREPORTS -} from '../../modules/nf-core/krakentools/combinekreports/main' +include { KRAKENTOOLS_COMBINEKREPORTS } from '../../modules/nf-core/krakentools/combinekreports/main' +include { KRAKENTOOLS_COMBINEKREPORTS as KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE } from '../../modules/nf-core/krakentools/combinekreports/main' include { METAPHLAN3_MERGEMETAPHLANTABLES } from '../../modules/nf-core/metaphlan3/mergemetaphlantables/main' include { MOTUS_MERGE } from '../../modules/nf-core/motus/merge/main' @@ -30,7 +27,6 @@ workflow STANDARDISATION_PROFILES { .branch { motus: it[0]['tool'] == 'motus' kraken2: it[0]['tool'] == 'kraken2' - bracken: it[0]['tool'] == 'bracken' centrifuge: it[0]['tool'] == 'centrifuge' metaphlan3: it[0]['tool'] == 'metaphlan3' unknown: true @@ -102,23 +98,6 @@ workflow STANDARDISATION_PROFILES { ch_multiqc_files = ch_multiqc_files.mix( KRAKENTOOLS_COMBINEKREPORTS.out.txt ) ch_versions = ch_versions.mix( KRAKENTOOLS_COMBINEKREPORTS.out.versions ) - // Bracken - - // Collect and replace id for db_name for prefix - // Have to sort by size to ensure first file actually has hits otherwise - // the script fails - ch_profiles_for_bracken = ch_input_profiles.bracken - .map { [it[0]['db_name'], it[1]] } - .groupTuple(sort: {-it.size()} ) - .map { - [[id:it[0]], it[1]] - } - - KRAKENTOOLS_COMBINEKREPORTS_BRACKEN ( ch_profiles_for_bracken ) - ch_standardised_tables = ch_standardised_tables.mix( KRAKENTOOLS_COMBINEKREPORTS_BRACKEN.out.txt ) - ch_multiqc_files = ch_multiqc_files.mix( KRAKENTOOLS_COMBINEKREPORTS_BRACKEN.out.txt ) - ch_versions = ch_versions.mix( KRAKENTOOLS_COMBINEKREPORTS_BRACKEN.out.versions ) - // MetaPhlAn3 ch_profiles_for_metaphlan3 = ch_input_profiles.metaphlan3 From 385507ec6fbb84fa2c3e09ba90365abdf98aac18 Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Fri, 21 Oct 2022 10:03:36 +0200 Subject: [PATCH 16/23] refactor: use tool name 'bracken' only --- subworkflows/local/profiling.nf | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index 5e2a7d2..e76db6c 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -41,7 +41,7 @@ workflow PROFILING { .combine(databases) .branch { malt: it[2]['tool'] == 'malt' - kraken2: it[2]['tool'].contains('kraken2') + kraken2: it[2]['tool'] == 'kraken2' || it[2]['tool'] == 'bracken' metaphlan3: it[2]['tool'] == 'metaphlan3' centrifuge: it[2]['tool'] == 'centrifuge' kaiju: it[2]['tool'] == 'kaiju' @@ -133,7 +133,7 @@ workflow PROFILING { ch_raw_classifications = ch_raw_classifications.mix( KRAKEN2_KRAKEN2.out.classified_reads_assignment ) ch_raw_profiles = ch_raw_profiles.mix( KRAKEN2_KRAKEN2.out.report - // Set the tool to be strictly 'kraken2' instead of potentially 'kraken2+bracken' for downstream use. + // Set the tool to be strictly 'kraken2' instead of potentially 'bracken' for downstream use. .map { meta, report -> [meta + [tool: 'kraken2'], report]} ) @@ -142,7 +142,7 @@ workflow PROFILING { if ( params.run_kraken2 && params.run_bracken ) { def ch_input_for_bracken = KRAKEN2_KRAKEN2.out.report - .filter { meta, report -> meta['tool'].contains('bracken') } + .filter { meta, report -> meta['tool'] == 'bracken' } if (params.kraken2_save_minimizers) { ch_input_for_bracken = KRAKEN2_STANDARD_REPORT(ch_input_for_bracken).report @@ -151,21 +151,17 @@ workflow PROFILING { ch_input_for_bracken = ch_input_for_bracken .combine( databases.filter { meta, db -> - meta['tool'].contains('bracken') + meta['tool'] == 'bracken' } ) .multiMap { meta, report, db_meta, db -> - report: [meta, report] + report: [meta + db_meta, report] db: db } BRACKEN_BRACKEN(ch_input_for_bracken.report, ch_input_for_bracken.db) ch_versions = ch_versions.mix(BRACKEN_BRACKEN.out.versions.first()) - ch_raw_profiles = ch_raw_profiles.mix( - BRACKEN_BRACKEN.out.reports - // Set the tool to be strictly 'bracken' instead of potentially 'kraken2+bracken' for downstream use. - .map { meta, report -> [meta + [tool: 'bracken'], report]} - ) + ch_raw_profiles = ch_raw_profiles.mix(BRACKEN_BRACKEN.out.reports) } From 7bd6112133ff3ff67772728243db29b5cabf4607 Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Fri, 21 Oct 2022 10:18:11 +0200 Subject: [PATCH 17/23] docs: describe Bracken database usage --- docs/usage.md | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 4f4c1ac..dd80eda 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -74,13 +74,13 @@ The pipeline takes the locations and specific profiling parameters of the tool o > ⚠️ nf-core/taxprofiler does not provide any databases by default, nor does it currently generate them for you. This must be performed manually by the user. See below for more information of the expected database files. -An example database sheet can look as follows, where 4 tools are being used, and `malt` and `kraken2` will be used against two databases each. +An example database sheet can look as follows, where 4 tools are being used, and `malt` and `kraken2` will be used against two databases each. This is because specifying `bracken` implies first running `kraken2` on the same database. ```console tool,db_name,db_params,db_path malt,malt85,-id 85,///malt/testdb-malt/ malt,malt95,-id 90,///malt/testdb-malt.tar.gz -kraken2,db1,,///kraken2/testdb-kraken2.tar.gz +bracken,db1,,///bracken/testdb-bracken.tar.gz kraken2,db2,--quick,///kraken2/testdb-kraken2.tar.gz centrifuge,db1,,///centrifuge/minigut_cf.tar.gz metaphlan3,db1,,///metaphlan3/metaphlan_database/ @@ -91,8 +91,8 @@ Column specifications are as follows: | Column | Description | | ----------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `tool` | Taxonomic profiling tool (supported by nf-core/taxprofiler) that the database has been indexed for [required]. | -| `db_name` | A unique name of the particular database [required]. | +| `tool` | Taxonomic profiling tool (supported by nf-core/taxprofiler) that the database has been indexed for [required]. Please note that `bracken` also implies running `kraken2` on the same database. | +| `db_name` | A unique name per tool for the particular database [required]. Please note that names need to be unique across both `kraken2` and `bracken` as well. | | `db_params` | Any parameters of the given taxonomic profiler that you wish to specify that the taxonomic profiling tool should use when profiling against this specific. Can be empty to use taxonomic profiler defaults. Must not be surrounded by quotes [required]. We generally do not recommend specifying parameters here that turn on/off saving of output files or specifying particular file extensions - this should be already addressed via pipeline parameters. | | `db_path` | Path to the database. Can either be a path to a directory containing the database index files or a `.tar.gz` file which contains the compressed database directory with the same name as the tar archive, minus `.tar.gz` [required]. | @@ -116,6 +116,15 @@ Expected (uncompressed) database files for each tool are as follows: - `opts.k2d` - `hash.k2d` - `taxo.k2d` +- **Bracken** output of a combined `kraken2-` and `bracken-build` process. Please see the [documentation on Bracken](https://github.com/jenniferlu717/Bracken#running-bracken-easy-version) for details. The output is a directory containing files per expected sequencing read length similarly to: + - `hash.k2d` + - `opts.k2d` + - `taxo.k2d` + - `database.kraken` + - `database100mers.kmer_distrib` + - `database100mers.kraken` + - `database150mers.kmer_distrib` + - `database150mers.kraken` - **Centrifuge** output of `centrifuge-build`. A directory containing: - `..cf` - `..cf` From 60e369aa5c7601a46e2ea2d6dddbe457f37b9f42 Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Fri, 21 Oct 2022 10:47:51 +0200 Subject: [PATCH 18/23] fix: set default run bracken to false --- conf/test_motus.config | 1 + nextflow.config | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/conf/test_motus.config b/conf/test_motus.config index d5eb8f8..d167b94 100644 --- a/conf/test_motus.config +++ b/conf/test_motus.config @@ -33,6 +33,7 @@ params { hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta' run_kaiju = false run_kraken2 = false + run_bracken = false run_malt = false run_metaphlan3 = false run_centrifuge = false diff --git a/nextflow.config b/nextflow.config index 4f32a00..24ccab3 100644 --- a/nextflow.config +++ b/nextflow.config @@ -117,7 +117,7 @@ params { kraken2_save_minimizers = false // Bracken - run_bracken = true + run_bracken = false // centrifuge run_centrifuge = false From ca7dc308cb89d6e8ba110de5c96ce1641ff8940a Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Fri, 21 Oct 2022 16:36:35 +0200 Subject: [PATCH 19/23] Apply suggestions from code review Co-authored-by: James A. Fellows Yates --- docs/usage.md | 2 +- subworkflows/local/profiling.nf | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index dd80eda..56f9123 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -92,7 +92,7 @@ Column specifications are as follows: | Column | Description | | ----------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `tool` | Taxonomic profiling tool (supported by nf-core/taxprofiler) that the database has been indexed for [required]. Please note that `bracken` also implies running `kraken2` on the same database. | -| `db_name` | A unique name per tool for the particular database [required]. Please note that names need to be unique across both `kraken2` and `bracken` as well. | +| `db_name` | A unique name per tool for the particular database [required]. Please note that names need to be unique across both `kraken2` and `bracken` as well, even if re-using the same database. | | `db_params` | Any parameters of the given taxonomic profiler that you wish to specify that the taxonomic profiling tool should use when profiling against this specific. Can be empty to use taxonomic profiler defaults. Must not be surrounded by quotes [required]. We generally do not recommend specifying parameters here that turn on/off saving of output files or specifying particular file extensions - this should be already addressed via pipeline parameters. | | `db_path` | Path to the database. Can either be a path to a directory containing the database index files or a `.tar.gz` file which contains the compressed database directory with the same name as the tar archive, minus `.tar.gz` [required]. | diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index e76db6c..e6272ac 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -41,7 +41,7 @@ workflow PROFILING { .combine(databases) .branch { malt: it[2]['tool'] == 'malt' - kraken2: it[2]['tool'] == 'kraken2' || it[2]['tool'] == 'bracken' + kraken2: it[2]['tool'] == 'kraken2' || it[2]['tool'] == 'bracken' // to reuse the kraken module to produce the input data for bracken metaphlan3: it[2]['tool'] == 'metaphlan3' centrifuge: it[2]['tool'] == 'centrifuge' kaiju: it[2]['tool'] == 'kaiju' @@ -134,6 +134,7 @@ workflow PROFILING { ch_raw_profiles = ch_raw_profiles.mix( KRAKEN2_KRAKEN2.out.report // Set the tool to be strictly 'kraken2' instead of potentially 'bracken' for downstream use. + // Will remain distinct from 'pure' Kraken2 results due to distinct database names in file names. .map { meta, report -> [meta + [tool: 'kraken2'], report]} ) @@ -141,6 +142,7 @@ workflow PROFILING { if ( params.run_kraken2 && params.run_bracken ) { + // remove files from 'pure' kraken2 runs, so only those aligned against bracken2 kraken database are taken for brakcen def ch_input_for_bracken = KRAKEN2_KRAKEN2.out.report .filter { meta, report -> meta['tool'] == 'bracken' } From 00038db6345d0492bc0a1a4ce0dacdf86dbd836e Mon Sep 17 00:00:00 2001 From: nf-core-bot Date: Tue, 25 Oct 2022 08:51:58 +0000 Subject: [PATCH 20/23] [automated] Fix linting with Prettier --- docs/usage.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/usage.md b/docs/usage.md index 56f9123..ada5a2d 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -92,7 +92,7 @@ Column specifications are as follows: | Column | Description | | ----------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `tool` | Taxonomic profiling tool (supported by nf-core/taxprofiler) that the database has been indexed for [required]. Please note that `bracken` also implies running `kraken2` on the same database. | -| `db_name` | A unique name per tool for the particular database [required]. Please note that names need to be unique across both `kraken2` and `bracken` as well, even if re-using the same database. | +| `db_name` | A unique name per tool for the particular database [required]. Please note that names need to be unique across both `kraken2` and `bracken` as well, even if re-using the same database. | | `db_params` | Any parameters of the given taxonomic profiler that you wish to specify that the taxonomic profiling tool should use when profiling against this specific. Can be empty to use taxonomic profiler defaults. Must not be surrounded by quotes [required]. We generally do not recommend specifying parameters here that turn on/off saving of output files or specifying particular file extensions - this should be already addressed via pipeline parameters. | | `db_path` | Path to the database. Can either be a path to a directory containing the database index files or a `.tar.gz` file which contains the compressed database directory with the same name as the tar archive, minus `.tar.gz` [required]. | From 0de1ac53561d490e35a500b0e18da3e37a913907 Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Thu, 27 Oct 2022 10:14:57 +0200 Subject: [PATCH 21/23] docs: add Bracken reference --- CITATIONS.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CITATIONS.md b/CITATIONS.md index 1ce4ec2..ec03fc4 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -36,6 +36,10 @@ > Wood, Derrick E., Jennifer Lu, and Ben Langmead. 2019. Improved Metagenomic Analysis with Kraken 2. Genome Biology 20 (1): 257. doi: 10.1186/s13059-019-1891-0. +- [Bracken](https://doi.org/10.7717/peerj-cs.104) + + > Lu, J., Breitwieser, F. P., Thielen, P., & Salzberg, S. L. (2017). Bracken: Estimating species abundance in metagenomics data. PeerJ Computer Science, 3, e104. doi: 10.7717/peerj-cs.104 + - [Krona](https://doi.org/10.1186/1471-2105-12-385) > Ondov, Brian D., Nicholas H. Bergman, and Adam M. Phillippy. 2011. Interactive metagenomic visualization in a Web browser. BMC Bioinformatics 12 (1): 385. doi: 10.1186/1471-2105-12-385. From dd1a4ff158bff66021138f4f63f5585a477b6618 Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Thu, 27 Oct 2022 11:19:14 +0200 Subject: [PATCH 22/23] refactor: only combine same database name [skip ci] --- subworkflows/local/profiling.nf | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index e6272ac..278e310 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -141,22 +141,25 @@ workflow PROFILING { } if ( params.run_kraken2 && params.run_bracken ) { - - // remove files from 'pure' kraken2 runs, so only those aligned against bracken2 kraken database are taken for brakcen - def ch_input_for_bracken = KRAKEN2_KRAKEN2.out.report + // Remove files from 'pure' kraken2 runs, so only those aligned against Bracken & kraken2 database are used. + def ch_kraken2_output = KRAKEN2_KRAKEN2.out.report .filter { meta, report -> meta['tool'] == 'bracken' } + // If necessary, convert the eight column output to six column output. if (params.kraken2_save_minimizers) { - ch_input_for_bracken = KRAKEN2_STANDARD_REPORT(ch_input_for_bracken).report + ch_kraken2_output = KRAKEN2_STANDARD_REPORT(ch_kraken2_output).report } - ch_input_for_bracken = ch_input_for_bracken - .combine( - databases.filter { meta, db -> - meta['tool'] == 'bracken' - } - ) - .multiMap { meta, report, db_meta, db -> + // Extract the database name to combine by. + def ch_bracken_databases = databases + .filter { meta, db -> meta['tool'] == 'bracken' } + .map { meta, db -> [meta['db_name'], meta, db] } + + // Extract the database name to combine by. + def ch_input_for_bracken = ch_kraken2_output + .map { meta, report -> [meta['db_name'], meta, report] } + .combine(ch_bracken_databases, by: 0) + .multiMap { key, meta, report, db_meta, db -> report: [meta + db_meta, report] db: db } From e658fab430db815bf7614743e3b249c861df15ee Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Thu, 27 Oct 2022 11:50:47 +0200 Subject: [PATCH 23/23] fix: remove `def` No idea why, must have something to do with scope. --- subworkflows/local/profiling.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index 278e310..b86c165 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -151,12 +151,12 @@ workflow PROFILING { } // Extract the database name to combine by. - def ch_bracken_databases = databases + ch_bracken_databases = databases .filter { meta, db -> meta['tool'] == 'bracken' } .map { meta, db -> [meta['db_name'], meta, db] } // Extract the database name to combine by. - def ch_input_for_bracken = ch_kraken2_output + ch_input_for_bracken = ch_kraken2_output .map { meta, report -> [meta['db_name'], meta, report] } .combine(ch_bracken_databases, by: 0) .multiMap { key, meta, report, db_meta, db ->