From d082d67bcb6e55dfcd46996b97a947819e86b3c7 Mon Sep 17 00:00:00 2001 From: sofstam Date: Mon, 25 Apr 2022 17:36:29 +0200 Subject: [PATCH 1/4] Add kaiju2table to taxprofiler --- conf/modules.config | 10 ++++ modules.json | 5 +- .../nf-core/modules/kaiju/kaiju2table/main.nf | 40 +++++++++++++++ .../modules/kaiju/kaiju2table/meta.yml | 50 +++++++++++++++++++ nextflow.config | 1 + nextflow_schema.json | 30 +++++++++-- subworkflows/local/profiling.nf | 6 ++- 7 files changed, 135 insertions(+), 7 deletions(-) create mode 100644 modules/nf-core/modules/kaiju/kaiju2table/main.nf create mode 100644 modules/nf-core/modules/kaiju/kaiju2table/meta.yml diff --git a/conf/modules.config b/conf/modules.config index 5c5de54..a72561d 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -289,4 +289,14 @@ process { ext.args = { "${meta.db_params}" } ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } } + + withName: KAIJU_KAIJU2TABLE { + ext.args = { "${meta.db_params}" } + ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } + publishDir = [ + path: { "${params.outdir}/kaiju/${meta.db_name}" }, + mode: params.publish_dir_mode, + pattern: '*.{txt}' + ] + } } diff --git a/modules.json b/modules.json index e9c1310..9beb010 100644 --- a/modules.json +++ b/modules.json @@ -36,6 +36,9 @@ "kaiju/kaiju": { "git_sha": "8856f127c58f6af479128be8b8df4d42e442ddbe" }, + "kaiju/kaiju2table": { + "git_sha": "538dbac98ba9c8f799536cd5a617195501439457" + }, "kraken2/kraken2": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, @@ -62,4 +65,4 @@ } } } -} +} \ No newline at end of file diff --git a/modules/nf-core/modules/kaiju/kaiju2table/main.nf b/modules/nf-core/modules/kaiju/kaiju2table/main.nf new file mode 100644 index 0000000..00739d1 --- /dev/null +++ b/modules/nf-core/modules/kaiju/kaiju2table/main.nf @@ -0,0 +1,40 @@ +process KAIJU_KAIJU2TABLE { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::kaiju=1.8.2" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/kaiju:1.8.2--h5b5514e_1': + 'quay.io/biocontainers/kaiju:1.8.2--h2e03b76_0' }" + + input: + tuple val(meta), path(results) + path db + val taxon_rank + + output: + tuple val(meta), path('*.txt'), emit: summary + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + dbnodes=`find -L ${db} -name "*nodes.dmp"` + dbname=`find -L ${db} -name "*.fmi" -not -name "._*"` + kaiju2table $args \\ + -t \$dbnodes \\ + -n \$dbname \\ + -r ${taxon_rank} \\ + -o ${prefix}.txt \\ + ${results} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + kaiju: \$(echo \$( kaiju -h 2>&1 | sed -n 1p | sed 's/^.*Kaiju //' )) + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/kaiju/kaiju2table/meta.yml b/modules/nf-core/modules/kaiju/kaiju2table/meta.yml new file mode 100644 index 0000000..bc3e85d --- /dev/null +++ b/modules/nf-core/modules/kaiju/kaiju2table/meta.yml @@ -0,0 +1,50 @@ +name: "kaiju_kaiju2table" +description: write your description here +keywords: + - classify + - metagenomics +tools: + - kaiju: + description: Fast and sensitive taxonomic classification for metagenomics + homepage: https://kaiju.binf.ku.dk/ + documentation: https://github.com/bioinformatics-centre/kaiju/blob/master/README.md + tool_dev_url: https://github.com/bioinformatics-centre/kaiju + doi: "10.1038/ncomms11257" + licence: ["GNU GPL v3"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - results: + type: file + description: File containing the kaiju classification results + pattern: "*.{txt}" + - taxon_rank: + type: string + description: | + Taxonomic rank to display in report + pattern: "phylum|class|order|family|genus|species" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - results: + type: file + description: | + Summary table for a given taxonomic rank + pattern: "*.{tsv}" + +authors: + - "@sofstam" + - "@talnor" + - "@jfy133" diff --git a/nextflow.config b/nextflow.config index 43aaae8..909da25 100644 --- a/nextflow.config +++ b/nextflow.config @@ -107,6 +107,7 @@ params { // kaiju run_kaiju = false + kaiju_taxon_name = 'species' } // Load base.config by default for all pipelines diff --git a/nextflow_schema.json b/nextflow_schema.json index 2181cce..9516b6a 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,7 +10,10 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": ["input", "outdir"], + "required": [ + "input", + "outdir" + ], "properties": { "input": { "type": "string", @@ -173,7 +176,14 @@ "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "enum": [ + "symlink", + "rellink", + "link", + "copy", + "copyNoFollow", + "move" + ], "hidden": true }, "email_on_fail": { @@ -294,7 +304,10 @@ "shortread_clipmerge_tool": { "type": "string", "default": "fastp", - "enum": ["fastp", "adapterremoval"] + "enum": [ + "fastp", + "adapterremoval" + ] }, "shortread_clipmerge_skipadaptertrim": { "type": "boolean" @@ -335,7 +348,10 @@ "shortread_complexityfilter_prinseqplusplus_mode": { "type": "string", "default": "entropy", - "enum": ["entropy", "dust"] + "enum": [ + "entropy", + "dust" + ] }, "shortread_complexityfilter_prinseqplusplus_dustscore": { "type": "number", @@ -384,6 +400,10 @@ }, "malt_generatemegansummary": { "type": "boolean" + }, + "kaiju_taxon_name": { + "type": "string", + "default": "species" } } -} +} \ No newline at end of file diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index fcb2678..1f1d4da 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -9,6 +9,7 @@ include { CENTRIFUGE_CENTRIFUGE } from '../../modules/nf-core/modules/cent include { CENTRIFUGE_KREPORT } from '../../modules/nf-core/modules/centrifuge/kreport/main' include { METAPHLAN3 } from '../../modules/nf-core/modules/metaphlan3/main' include { KAIJU_KAIJU } from '../../modules/nf-core/modules/kaiju/kaiju/main' +include { KAIJU_KAIJU2TABLE } from '../../modules/nf-core/modules/kaiju/kaiju2table/main' workflow PROFILING { take: @@ -155,8 +156,11 @@ workflow PROFILING { } if ( params.run_kaiju ) { - KAIJU_KAIJU ( ch_input_for_kaiju.reads, ch_input_for_kaiju.db ) + KAIJU_KAIJU ( ch_input_for_kaiju.reads, ch_input_for_kaiju.db) + KAIJU_KAIJU2TABLE (KAIJU_KAIJU.out.results, ch_input_for_kaiju.db, params.kaiju_taxon_name) + ch_multiqc_files = ch_multiqc_files.mix( KAIJU_KAIJU2TABLE.out.summary.collect{it[1]}.ifEmpty([]) ) ch_versions = ch_versions.mix( KAIJU_KAIJU.out.versions.first() ) + ch_raw_profiles = ch_raw_profiles.mix( KAIJU_KAIJU2TABLE.out.summary ) } emit: From f4480feb9bb5f76a7e12f84682d512d2932022d2 Mon Sep 17 00:00:00 2001 From: Sofia Stamouli Date: Mon, 25 Apr 2022 17:45:26 +0200 Subject: [PATCH 2/4] Prettier --- modules.json | 2 +- nextflow_schema.json | 26 +++++--------------------- 2 files changed, 6 insertions(+), 22 deletions(-) diff --git a/modules.json b/modules.json index 9beb010..ffcde5d 100644 --- a/modules.json +++ b/modules.json @@ -65,4 +65,4 @@ } } } -} \ No newline at end of file +} diff --git a/nextflow_schema.json b/nextflow_schema.json index 9516b6a..93450d2 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,10 +10,7 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": [ - "input", - "outdir" - ], + "required": ["input", "outdir"], "properties": { "input": { "type": "string", @@ -176,14 +173,7 @@ "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": [ - "symlink", - "rellink", - "link", - "copy", - "copyNoFollow", - "move" - ], + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], "hidden": true }, "email_on_fail": { @@ -304,10 +294,7 @@ "shortread_clipmerge_tool": { "type": "string", "default": "fastp", - "enum": [ - "fastp", - "adapterremoval" - ] + "enum": ["fastp", "adapterremoval"] }, "shortread_clipmerge_skipadaptertrim": { "type": "boolean" @@ -348,10 +335,7 @@ "shortread_complexityfilter_prinseqplusplus_mode": { "type": "string", "default": "entropy", - "enum": [ - "entropy", - "dust" - ] + "enum": ["entropy", "dust"] }, "shortread_complexityfilter_prinseqplusplus_dustscore": { "type": "number", @@ -406,4 +390,4 @@ "default": "species" } } -} \ No newline at end of file +} From 7afda265c5c99149739a62dfc0449aad20582184 Mon Sep 17 00:00:00 2001 From: Sofia Stamouli Date: Tue, 26 Apr 2022 11:27:18 +0200 Subject: [PATCH 3/4] Enumerating taxon --- nextflow_schema.json | 36 ++++++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 93450d2..2fda6e1 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,7 +10,10 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": ["input", "outdir"], + "required": [ + "input", + "outdir" + ], "properties": { "input": { "type": "string", @@ -173,7 +176,14 @@ "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "enum": [ + "symlink", + "rellink", + "link", + "copy", + "copyNoFollow", + "move" + ], "hidden": true }, "email_on_fail": { @@ -294,7 +304,10 @@ "shortread_clipmerge_tool": { "type": "string", "default": "fastp", - "enum": ["fastp", "adapterremoval"] + "enum": [ + "fastp", + "adapterremoval" + ] }, "shortread_clipmerge_skipadaptertrim": { "type": "boolean" @@ -335,7 +348,10 @@ "shortread_complexityfilter_prinseqplusplus_mode": { "type": "string", "default": "entropy", - "enum": ["entropy", "dust"] + "enum": [ + "entropy", + "dust" + ] }, "shortread_complexityfilter_prinseqplusplus_dustscore": { "type": "number", @@ -387,7 +403,15 @@ }, "kaiju_taxon_name": { "type": "string", - "default": "species" + "default": "species", + "enum": [ + "phylum", + "class", + "order", + "family", + "genus", + "species" + ] } } -} +} \ No newline at end of file From 194b89e66a32862d234ef39cc36aad46c97a2da7 Mon Sep 17 00:00:00 2001 From: Sofia Stamouli Date: Tue, 26 Apr 2022 12:53:39 +0200 Subject: [PATCH 4/4] Prettier --- nextflow_schema.json | 35 ++++++----------------------------- 1 file changed, 6 insertions(+), 29 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 2fda6e1..83793e8 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,10 +10,7 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": [ - "input", - "outdir" - ], + "required": ["input", "outdir"], "properties": { "input": { "type": "string", @@ -176,14 +173,7 @@ "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": [ - "symlink", - "rellink", - "link", - "copy", - "copyNoFollow", - "move" - ], + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], "hidden": true }, "email_on_fail": { @@ -304,10 +294,7 @@ "shortread_clipmerge_tool": { "type": "string", "default": "fastp", - "enum": [ - "fastp", - "adapterremoval" - ] + "enum": ["fastp", "adapterremoval"] }, "shortread_clipmerge_skipadaptertrim": { "type": "boolean" @@ -348,10 +335,7 @@ "shortread_complexityfilter_prinseqplusplus_mode": { "type": "string", "default": "entropy", - "enum": [ - "entropy", - "dust" - ] + "enum": ["entropy", "dust"] }, "shortread_complexityfilter_prinseqplusplus_dustscore": { "type": "number", @@ -404,14 +388,7 @@ "kaiju_taxon_name": { "type": "string", "default": "species", - "enum": [ - "phylum", - "class", - "order", - "family", - "genus", - "species" - ] + "enum": ["phylum", "class", "order", "family", "genus", "species"] } } -} \ No newline at end of file +}