1
0
Fork 0
mirror of https://github.com/MillironX/taxprofiler.git synced 2024-09-21 03:02:04 +00:00

Add taxpasta_merge to taxprofiler

This commit is contained in:
sofstam 2023-02-16 14:29:52 +01:00
parent 21d9135b14
commit de6a4214ef
8 changed files with 302 additions and 47 deletions

View file

@ -533,6 +533,21 @@ process {
]
}
withName: TAXPASTA_MERGE {
ext.args = {
[
"-p ${meta.tool} -o ${meta.tool}_${meta.id}.${params.taxpasta_standardisation_format}",
params.taxpasta_add_taxonomy ? "-p" : "",
params.taxpasta_add_samplesheet ? "-s" :""
].join(',').replaceAll(','," ")
}
publishDir = [
path: { "${params.outdir}/taxpasta/" },
mode: params.publish_dir_mode,
pattern: '*.{tsv,csv,arrow,parquet,biom}'
]
}
withName: CUSTOM_DUMPSOFTWAREVERSIONS {
publishDir = [
path: { "${params.outdir}/pipeline_info" },

View file

@ -25,7 +25,7 @@ params {
perform_shortread_qc = true
perform_longread_qc = true
shortread_qc_mergepairs = true
perform_shortread_complexityfilter = true
perform_shortread_complexityfilter = false
perform_shortread_hostremoval = true
perform_longread_hostremoval = true
perform_runmerging = true
@ -44,7 +44,7 @@ params {
malt_save_reads = true
kraken2_save_reads = true
centrifuge_save_reads = true
diamond_save_reads = true
run_profile_standardisation = true
}
process {

View file

@ -8,209 +8,298 @@
"adapterremoval": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"bbmap/bbduk": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"bowtie2/align": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"bowtie2/build": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"bracken/bracken": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"bracken/combinebrackenoutputs": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"cat/fastq": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"centrifuge/centrifuge": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"centrifuge/kreport": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"custom/dumpsoftwareversions": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"diamond/blastx": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"falco": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"],
"installed_by": [
"modules"
],
"patch": "modules/nf-core/falco/falco.diff"
},
"fastp": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"fastqc": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"filtlong": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"gunzip": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"kaiju/kaiju": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"kaiju/kaiju2krona": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"kaiju/kaiju2table": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"kraken2/kraken2": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"krakentools/combinekreports": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"krakentools/kreport2krona": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"krakenuniq/preloadedkrakenuniq": {
"branch": "master",
"git_sha": "a6eb17f65b3ee5761c25c075a6166c9f76733cee",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"krona/ktimporttaxonomy": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"krona/ktimporttext": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"malt/run": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"megan/rma2info": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"metaphlan3/mergemetaphlantables": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"metaphlan3/metaphlan3": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"minimap2/align": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"minimap2/index": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"motus/merge": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"motus/profile": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"multiqc": {
"branch": "master",
"git_sha": "ee80d14721e76e2e079103b8dcd5d57129e584ba",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"porechop/porechop": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"],
"installed_by": [
"modules"
],
"patch": "modules/nf-core/porechop/porechop/porechop-porechop.diff"
},
"prinseqplusplus": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"samtools/bam2fq": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"samtools/index": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"samtools/stats": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"samtools/view": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"taxpasta/merge": {
"branch": "master",
"git_sha": "74ab450ed05e034d049c00f6e2853de2c31594b4",
"installed_by": [
"modules"
]
},
"untar": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
}
}
}

47
modules/nf-core/taxpasta/merge/main.nf generated Normal file
View file

@ -0,0 +1,47 @@
process TAXPASTA_MERGE {
tag "$meta.id"
label 'process_single'
conda "bioconda::taxpasta=0.1.1"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/taxpasta:0.1.1--pyhdfd78af_0':
'quay.io/biocontainers/taxpasta:0.1.1--pyhdfd78af_0' }"
input:
tuple val(meta), path(profiles)
path taxonomy
path samplesheet
output:
tuple val(meta), path("*.{tsv,csv,arrow,parquet,biom}"), emit: merged_profiles
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
// N.B.: Taxpasta requires a --profiler option and will fail without it.
// This must be specified via a `nextflow.config` or `modules.config`, for
// example, as "--profiler kraken2". Additionally, it requires a --output
// option with the output file name. The desired format will be parsed from
// the name and should correspond to the output pattern specified above,
// e.g., "--output ${task.ext.prefix}.tsv".
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def taxonomy_option = taxonomy ? "--taxonomy ${taxonomy}" : ''
def samplesheet_input = samplesheet ? "-s ${samplesheet}" : ''
"""
taxpasta merge \\
$args \\
$taxonomy_option \\
$samplesheet_input \\
$profiles
cat <<-END_VERSIONS > versions.yml
"${task.process}":
taxpasta: \$(taxpasta --version)
END_VERSIONS
"""
}

58
modules/nf-core/taxpasta/merge/meta.yml generated Normal file
View file

@ -0,0 +1,58 @@
name: "taxpasta_merge"
description: Standardise and merge two or more taxonomic profiles into a single table
keywords:
- taxonomic profile
- standardise
- standardisation
- metagenomics
- taxonomic profiling
- otu tables
- taxon tables
tools:
- "taxpasta":
description: "TAXonomic Profile Aggregation and STAndardisation"
homepage: "https://taxpasta.readthedocs.io/"
documentation: "https://taxpasta.readthedocs.io/"
tool_dev_url: "https://github.com/taxprofiler/taxpasta"
doi: ""
licence: "['Apache-2.0']"
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- profiles:
type: file
description: A list of taxonomic profiler output files (typically in text format, mandatory)
pattern: "*.{tsv,csv,arrow,parquet,biom}"
- samplesheet:
type: file
description:
A samplesheet describing the sample name and a filepath to a taxonomic abundance profile that needs to be relative
from the work environment. The profiles must be provided even if you give a samplesheet as argument (optional)
pattern: "*.{tsv,csv,ods,xlsx,arrow,parquet}"
- taxonomy:
type: directory
description: Directory containing at a minimum nodes.dmp and names.dmp files (optional)
pattern: "*/"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- merged_profiles:
type: file
description: Output file with standardised multiple profiles in one go and have all profiles combined into a single table.
pattern: "*.{tsv,csv,ods,xlsx,arrow,parquet,biom}"
authors:
- "@sofstam"
- "@jfy133"

View file

@ -156,6 +156,9 @@ params {
// profile standardisation
run_profile_standardisation = false
taxpasta_add_taxonomy = false
taxpasta_add_samplesheet = false
taxpasta_standardisation_format = 'tsv'
generate_biom_output = false
}
@ -242,6 +245,15 @@ profiles {
executor.cpus = 16
executor.memory = 60.GB
}
hasta {
includeConfig 'conf/hasta.config'
}
dev_priority {
params {
priority = 'development'
clusterOptions = "--qos=low"
}
}
test { includeConfig 'conf/test.config' }
test_full { includeConfig 'conf/test_full.config' }
test_noprofiling { includeConfig 'conf/test_noprofiling.config' }

View file

@ -767,5 +767,19 @@
{
"$ref": "#/definitions/reference_genome_options"
}
]
],
"properties": {
"add_taxonomy": {
"type": "string",
"default": "false"
},
"add_samplesheet": {
"type": "string",
"default": "false"
},
"standardisation_taxpasta_format": {
"type": "string",
"default": "tsv"
}
}
}

View file

@ -8,6 +8,7 @@ include { KRAKENTOOLS_COMBINEKREPORTS as KRAKENTOOLS_COMBINEKREPORTS_KRAKEN
include { KRAKENTOOLS_COMBINEKREPORTS as KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE } from '../../modules/nf-core/krakentools/combinekreports/main'
include { METAPHLAN3_MERGEMETAPHLANTABLES } from '../../modules/nf-core/metaphlan3/mergemetaphlantables/main'
include { MOTUS_MERGE } from '../../modules/nf-core/motus/merge/main'
include { TAXPASTA_MERGE } from '../../modules/nf-core/taxpasta/merge/main'
workflow STANDARDISATION_PROFILES {
take:
@ -21,6 +22,20 @@ workflow STANDARDISATION_PROFILES {
ch_versions = Channel.empty()
ch_multiqc_files = Channel.empty()
//Taxpasta standardisation
ch_input_for_taxpasta = profiles
.map {
meta, profile ->
def meta_new = [:]
meta_new.id = meta.db_name
meta_new.tool = meta.tool == 'metaphlan3' ? 'metaphlan' : meta.tool == 'malt' ? 'megan6' : meta.tool
[meta_new, profile]
}
.groupTuple ()
TAXPASTA_MERGE (ch_input_for_taxpasta, [], [])
/*
Split profile results based on tool they come from
*/
@ -74,6 +89,8 @@ workflow STANDARDISATION_PROFILES {
[[id:it[0]], it[1]]
}
KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE ( ch_profiles_for_centrifuge )
ch_standardised_tables = ch_standardised_tables.mix( KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE.out.txt )
ch_multiqc_files = ch_multiqc_files.mix( KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE.out.txt )
@ -125,6 +142,8 @@ workflow STANDARDISATION_PROFILES {
ch_multiqc_files = ch_multiqc_files.mix( METAPHLAN3_MERGEMETAPHLANTABLES.out.txt )
ch_versions = ch_versions.mix( METAPHLAN3_MERGEMETAPHLANTABLES.out.versions )
ch_standardised_tables.dump (tag: 'standardised')
// mOTUs
// mOTUs has a 'single' database, and cannot create custom ones.
@ -149,6 +168,7 @@ workflow STANDARDISATION_PROFILES {
emit:
tables = ch_standardised_tables
taxpasta = TAXPASTA_MERGE.out.merged_profiles
versions = ch_versions
mqc = ch_multiqc_files
}