1
0
Fork 0
mirror of https://github.com/MillironX/taxprofiler.git synced 2024-11-22 03:59:55 +00:00

Merge pull request #248 from genomic-medicine-sweden/update_taxpasta_merge

Update taxpasta/merge to version 0.2.0 and add taxonomy argument
This commit is contained in:
Sofia Stamouli 2023-02-27 15:19:58 +01:00 committed by GitHub
commit b93c7082b8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 46 additions and 5 deletions

View file

@ -543,7 +543,16 @@ process {
}
withName: TAXPASTA_MERGE {
ext.args = { "-p ${meta.tool} -o ${meta.tool}_${meta.id}.${params.standardisation_taxpasta_format}" }
ext.args = {
[
"-p ${meta.tool} -o ${meta.tool}_${meta.id}.${params.standardisation_taxpasta_format}",
params.taxpasta_taxonomy_dir ? "--taxonomy ${params.taxpasta_taxonomy_dir}" : "",
params.taxpasta_add_name ? "--add-name" : "",
params.taxpasta_add_rank ? "--add-rank" : "",
params.taxpasta_add_lineage ? "--add-lineage" : "",
params.taxpasta_add_idlineage ? "--add-id-lineage" : ""
].join(' ').trim()
}
publishDir = [
path: { "${params.outdir}/taxpasta/" },
mode: params.publish_dir_mode,

View file

@ -449,6 +449,8 @@ The resulting HTML files can be loaded into your web browser for exploration. Ea
</details>
By providing the path to a directory containing taxdump files to `--taxpasta_taxonomy_dir`, the taxon name, the taxon rank, the taxon's entire lineage including taxon names and/or the taxon's entire lineage including taxon identifiers can also be added in the output in addition to just the taxon ID. Addition of this extra information can be turned by using the parameters `--taxpasta_add_name`, `--taxpasta_add_rank`, `--taxpasta_add_lineage` and `--taxpasta_add_idlineage` respectively.
These files will likely be the most useful files for the comparison of differences in classification between different tools or building consensuses, with the caveat they have slightly less information than the actual output from each tool (which may have non-standard information e.g. taxonomic rank, percentage of hits, abundance estimations).
The following report files are used for the taxpasta step:

View file

@ -209,7 +209,7 @@
},
"taxpasta/merge": {
"branch": "master",
"git_sha": "74ab450ed05e034d049c00f6e2853de2c31594b4",
"git_sha": "fe58454add6225d2b7468e6d72a3a1f6a3149638",
"installed_by": ["modules"]
},
"untar": {

View file

@ -2,10 +2,10 @@ process TAXPASTA_MERGE {
tag "$meta.id"
label 'process_single'
conda "bioconda::taxpasta=0.1.1"
conda "bioconda::taxpasta=0.2.0"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/taxpasta:0.1.1--pyhdfd78af_0':
'quay.io/biocontainers/taxpasta:0.1.1--pyhdfd78af_0' }"
'https://depot.galaxyproject.org/singularity/taxpasta:0.2.0--pyhdfd78af_0':
'quay.io/biocontainers/taxpasta:0.2.0--pyhdfd78af_0' }"
input:

View file

@ -157,6 +157,11 @@ params {
// profile standardisation
run_profile_standardisation = false
standardisation_taxpasta_format = 'tsv'
taxpasta_taxonomy_dir = null
taxpasta_add_name = false
taxpasta_add_rank = false
taxpasta_add_lineage = false
taxpasta_add_idlineage = false
standardisation_motus_generatebiom = false
}

View file

@ -516,6 +516,31 @@
"fa_icon": "fas fa-file",
"description": "The desired output format.",
"enum": ["tsv", "csv", "arrow", "parquet", "biom"]
},
"taxpasta_taxonomy_dir": {
"type": "string",
"description": "The path to a directory containing taxdump files.",
"help_text": "This arguments provides the path to the directory containing taxdump files. At least nodes.dmp and names.dmp are required. A merged.dmp file is optional. \n\nModifies tool parameter(s):\n-taxpasta: `--taxpasta_taxonomy_dir`"
},
"taxpasta_add_name": {
"type": "boolean",
"description": "Add the taxon name to the output.",
"help_text": "The standard output format of taxpasta is a two-column table including the read counts and the integer taxonomic ID. The taxon name can be added as additional information to the output table.\n\nModifies tool parameter(s):\n- taxpasta: `--taxpasta_add_name`"
},
"taxpasta_add_rank": {
"type": "boolean",
"description": "Add the taxon rank to the output.",
"help_text": "The standard output format of taxpasta is a two-column table including the read counts and the integer taxonomic ID. The taxon rank can be added as additional information to the output table.\n\nModifies tool parameter(s):\n- taxpasta: `--taxpasta_add_rank`"
},
"taxpasta_add_lineage": {
"type": "boolean",
"description": "Add the taxon's entire lineage to the output.",
"help_text": "\nThe standard output format of taxpasta is a two-column table including the read counts and the integer taxonomic ID. The taxon's entire lineage with the taxon names separated by semi-colons can be added as additional information to the output table.\n\nModifies tool parameter(s):\n- taxpasta: `--taxpasta_add_lineage`\n"
},
"taxpasta_add_idlineage": {
"type": "boolean",
"description": "Add the taxon's entire lineage to the output.",
"help_text": "\nThe standard output format of taxpasta is a two-column table including the read counts and the integer taxonomic ID. The taxon's entire lineage with the taxon identifiers separated by semi-colons can be added as additional information to the output table.\n\nModifies tool parameter(s):\n- taxpasta: `--taxpasta_add_idlineage`\n"
}
},
"fa_icon": "fas fa-chart-line"