diff --git a/conf/modules.config b/conf/modules.config index 605a7c5..defef95 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -543,7 +543,16 @@ process { } withName: TAXPASTA_MERGE { - ext.args = { "-p ${meta.tool} -o ${meta.tool}_${meta.id}.${params.standardisation_taxpasta_format}" } + ext.args = { + [ + "-p ${meta.tool} -o ${meta.tool}_${meta.id}.${params.standardisation_taxpasta_format}", + params.taxpasta_taxonomy_dir ? "--taxonomy ${params.taxpasta_taxonomy_dir}" : "", + params.taxpasta_add_name ? "--add-name" : "", + params.taxpasta_add_rank ? "--add-rank" : "", + params.taxpasta_add_lineage ? "--add-lineage" : "", + params.taxpasta_add_idlineage ? "--add-id-lineage" : "" + ].join(' ').trim() + } publishDir = [ path: { "${params.outdir}/taxpasta/" }, mode: params.publish_dir_mode, diff --git a/docs/output.md b/docs/output.md index 85b163c..0d35bdd 100644 --- a/docs/output.md +++ b/docs/output.md @@ -449,6 +449,8 @@ The resulting HTML files can be loaded into your web browser for exploration. Ea +By providing the path to a directory containing taxdump files to `--taxpasta_taxonomy_dir`, the taxon name, the taxon rank, the taxon's entire lineage including taxon names and/or the taxon's entire lineage including taxon identifiers can also be added in the output in addition to just the taxon ID. Addition of this extra information can be turned by using the parameters `--taxpasta_add_name`, `--taxpasta_add_rank`, `--taxpasta_add_lineage` and `--taxpasta_add_idlineage` respectively. + These files will likely be the most useful files for the comparison of differences in classification between different tools or building consensuses, with the caveat they have slightly less information than the actual output from each tool (which may have non-standard information e.g. taxonomic rank, percentage of hits, abundance estimations). The following report files are used for the taxpasta step: diff --git a/modules.json b/modules.json index 5b07b07..d1899a4 100644 --- a/modules.json +++ b/modules.json @@ -209,7 +209,7 @@ }, "taxpasta/merge": { "branch": "master", - "git_sha": "74ab450ed05e034d049c00f6e2853de2c31594b4", + "git_sha": "fe58454add6225d2b7468e6d72a3a1f6a3149638", "installed_by": ["modules"] }, "untar": { diff --git a/modules/nf-core/taxpasta/merge/main.nf b/modules/nf-core/taxpasta/merge/main.nf index 67a6f25..46aea29 100644 --- a/modules/nf-core/taxpasta/merge/main.nf +++ b/modules/nf-core/taxpasta/merge/main.nf @@ -2,10 +2,10 @@ process TAXPASTA_MERGE { tag "$meta.id" label 'process_single' - conda "bioconda::taxpasta=0.1.1" + conda "bioconda::taxpasta=0.2.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/taxpasta:0.1.1--pyhdfd78af_0': - 'quay.io/biocontainers/taxpasta:0.1.1--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/taxpasta:0.2.0--pyhdfd78af_0': + 'quay.io/biocontainers/taxpasta:0.2.0--pyhdfd78af_0' }" input: diff --git a/nextflow.config b/nextflow.config index 9228d67..19ca198 100644 --- a/nextflow.config +++ b/nextflow.config @@ -157,6 +157,11 @@ params { // profile standardisation run_profile_standardisation = false standardisation_taxpasta_format = 'tsv' + taxpasta_taxonomy_dir = null + taxpasta_add_name = false + taxpasta_add_rank = false + taxpasta_add_lineage = false + taxpasta_add_idlineage = false standardisation_motus_generatebiom = false } diff --git a/nextflow_schema.json b/nextflow_schema.json index 3de42d5..2a72303 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -516,6 +516,31 @@ "fa_icon": "fas fa-file", "description": "The desired output format.", "enum": ["tsv", "csv", "arrow", "parquet", "biom"] + }, + "taxpasta_taxonomy_dir": { + "type": "string", + "description": "The path to a directory containing taxdump files.", + "help_text": "This arguments provides the path to the directory containing taxdump files. At least nodes.dmp and names.dmp are required. A merged.dmp file is optional. \n\nModifies tool parameter(s):\n-taxpasta: `--taxpasta_taxonomy_dir`" + }, + "taxpasta_add_name": { + "type": "boolean", + "description": "Add the taxon name to the output.", + "help_text": "The standard output format of taxpasta is a two-column table including the read counts and the integer taxonomic ID. The taxon name can be added as additional information to the output table.\n\nModifies tool parameter(s):\n- taxpasta: `--taxpasta_add_name`" + }, + "taxpasta_add_rank": { + "type": "boolean", + "description": "Add the taxon rank to the output.", + "help_text": "The standard output format of taxpasta is a two-column table including the read counts and the integer taxonomic ID. The taxon rank can be added as additional information to the output table.\n\nModifies tool parameter(s):\n- taxpasta: `--taxpasta_add_rank`" + }, + "taxpasta_add_lineage": { + "type": "boolean", + "description": "Add the taxon's entire lineage to the output.", + "help_text": "\nThe standard output format of taxpasta is a two-column table including the read counts and the integer taxonomic ID. The taxon's entire lineage with the taxon names separated by semi-colons can be added as additional information to the output table.\n\nModifies tool parameter(s):\n- taxpasta: `--taxpasta_add_lineage`\n" + }, + "taxpasta_add_idlineage": { + "type": "boolean", + "description": "Add the taxon's entire lineage to the output.", + "help_text": "\nThe standard output format of taxpasta is a two-column table including the read counts and the integer taxonomic ID. The taxon's entire lineage with the taxon identifiers separated by semi-colons can be added as additional information to the output table.\n\nModifies tool parameter(s):\n- taxpasta: `--taxpasta_add_idlineage`\n" } }, "fa_icon": "fas fa-chart-line"