Merge pull request #248 from genomic-medicine-sweden/update_taxpasta_merge

Update taxpasta/merge to version 0.2.0 and add taxonomy argument
2024-11-22 02:36:03 +00:00 · 2023-02-27 15:19:58 +01:00 · 2023-02-27 15:19:58 +01:00 · b93c7082b8
commit b93c7082b8
parent 78c2d275e6 df5303270d
6 changed files with 46 additions and 5 deletions
--- a/conf/modules.config
+++ b/conf/modules.config
@ -543,7 +543,16 @@ process {
    }

    withName: TAXPASTA_MERGE {
-        ext.args =  { "-p ${meta.tool} -o ${meta.tool}_${meta.id}.${params.standardisation_taxpasta_format}" }
+        ext.args =  {
+            [
+                "-p ${meta.tool} -o ${meta.tool}_${meta.id}.${params.standardisation_taxpasta_format}",
+                params.taxpasta_taxonomy_dir ? "--taxonomy ${params.taxpasta_taxonomy_dir}" : "",
+                params.taxpasta_add_name ?  "--add-name" : "",
+                params.taxpasta_add_rank ? "--add-rank" : "",
+                params.taxpasta_add_lineage ? "--add-lineage" : "",
+                params.taxpasta_add_idlineage ? "--add-id-lineage" : ""
+            ].join(' ').trim()
+            }
        publishDir = [
            path: { "${params.outdir}/taxpasta/" },
            mode: params.publish_dir_mode,
--- a/docs/output.md
+++ b/docs/output.md
@ -449,6 +449,8 @@ The resulting HTML files can be loaded into your web browser for exploration. Ea

  </details>

+By providing the path to a directory containing taxdump files to `--taxpasta_taxonomy_dir`, the taxon name, the taxon rank, the taxon's entire lineage including taxon names and/or the taxon's entire lineage including taxon identifiers can also be added in the output in addition to just the taxon ID. Addition of this extra information can be turned by using the parameters `--taxpasta_add_name`, `--taxpasta_add_rank`, `--taxpasta_add_lineage` and `--taxpasta_add_idlineage` respectively.
+
 These files will likely be the most useful files for the comparison of differences in classification between different tools or building consensuses, with the caveat they have slightly less information than the actual output from each tool (which may have non-standard information e.g. taxonomic rank, percentage of hits, abundance estimations).

 The following report files are used for the taxpasta step:
--- a/modules.json
+++ b/modules.json
@ -209,7 +209,7 @@
                    },
                    "taxpasta/merge": {
                        "branch": "master",
-                        "git_sha": "74ab450ed05e034d049c00f6e2853de2c31594b4",
+                        "git_sha": "fe58454add6225d2b7468e6d72a3a1f6a3149638",
                        "installed_by": ["modules"]
                    },
                    "untar": {
--- a/modules/nf-core/taxpasta/merge/main.nf
+++ b/modules/nf-core/taxpasta/merge/main.nf
@ -2,10 +2,10 @@ process TAXPASTA_MERGE {
    tag "$meta.id"
    label 'process_single'

-    conda "bioconda::taxpasta=0.1.1"
+    conda "bioconda::taxpasta=0.2.0"
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/taxpasta:0.1.1--pyhdfd78af_0':
-        'quay.io/biocontainers/taxpasta:0.1.1--pyhdfd78af_0' }"
+        'https://depot.galaxyproject.org/singularity/taxpasta:0.2.0--pyhdfd78af_0':
+        'quay.io/biocontainers/taxpasta:0.2.0--pyhdfd78af_0' }"


    input:
--- a/nextflow.config
+++ b/nextflow.config
@ -157,6 +157,11 @@ params {
    // profile standardisation
    run_profile_standardisation             = false
    standardisation_taxpasta_format         = 'tsv'
+    taxpasta_taxonomy_dir                   = null
+    taxpasta_add_name                       = false
+    taxpasta_add_rank                       = false
+    taxpasta_add_lineage                    = false
+    taxpasta_add_idlineage                  = false
    standardisation_motus_generatebiom      = false
 }

--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@ -516,6 +516,31 @@
                    "fa_icon": "fas fa-file",
                    "description": "The desired output format.",
                    "enum": ["tsv", "csv", "arrow", "parquet", "biom"]
+                },
+                "taxpasta_taxonomy_dir": {
+                    "type": "string",
+                    "description": "The path to a directory containing taxdump files.",
+                    "help_text": "This arguments provides the path to the directory containing taxdump files. At least nodes.dmp and names.dmp are required. A merged.dmp file is optional. \n\nModifies tool parameter(s):\n-taxpasta: `--taxpasta_taxonomy_dir`"
+                },
+                "taxpasta_add_name": {
+                    "type": "boolean",
+                    "description": "Add the taxon name to the output.",
+                    "help_text": "The standard output format of taxpasta is a two-column table including the read counts and the integer taxonomic ID. The taxon name can be added as additional information to the output table.\n\nModifies tool parameter(s):\n- taxpasta: `--taxpasta_add_name`"
+                },
+                "taxpasta_add_rank": {
+                    "type": "boolean",
+                    "description": "Add the taxon rank to the output.",
+                    "help_text": "The standard output format of taxpasta is a two-column table including the read counts and the integer taxonomic ID. The taxon rank can be added as additional information to the output table.\n\nModifies tool parameter(s):\n- taxpasta: `--taxpasta_add_rank`"
+                },
+                "taxpasta_add_lineage": {
+                    "type": "boolean",
+                    "description": "Add the taxon's entire lineage to the output.",
+                    "help_text": "\nThe standard output format of taxpasta is a two-column table including the read counts and the integer taxonomic ID. The taxon's entire lineage with the taxon names separated by semi-colons can be added as additional information to the output table.\n\nModifies tool parameter(s):\n- taxpasta: `--taxpasta_add_lineage`\n"
+                },
+                "taxpasta_add_idlineage": {
+                    "type": "boolean",
+                    "description": "Add the taxon's entire lineage to the output.",
+                    "help_text": "\nThe standard output format of taxpasta is a two-column table including the read counts and the integer taxonomic ID. The taxon's entire lineage with the taxon identifiers separated by semi-colons can be added as additional information to the output table.\n\nModifies tool parameter(s):\n- taxpasta: `--taxpasta_add_idlineage`\n"
                }
            },
            "fa_icon": "fas fa-chart-line"