From de6a4214ef78a53d0d39f0226a8c0b65afeecd32 Mon Sep 17 00:00:00 2001 From: sofstam Date: Thu, 16 Feb 2023 14:29:52 +0100 Subject: [PATCH] Add taxpasta_merge to taxprofiler --- conf/modules.config | 15 ++ conf/test.config | 4 +- modules.json | 173 +++++++++++++----- modules/nf-core/taxpasta/merge/main.nf | 47 +++++ modules/nf-core/taxpasta/merge/meta.yml | 58 ++++++ nextflow.config | 16 +- nextflow_schema.json | 16 +- .../local/standardisation_profiles.nf | 20 ++ 8 files changed, 302 insertions(+), 47 deletions(-) create mode 100644 modules/nf-core/taxpasta/merge/main.nf create mode 100644 modules/nf-core/taxpasta/merge/meta.yml diff --git a/conf/modules.config b/conf/modules.config index 8ef8728..51b27ad 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -533,6 +533,21 @@ process { ] } + withName: TAXPASTA_MERGE { + ext.args = { + [ + "-p ${meta.tool} -o ${meta.tool}_${meta.id}.${params.taxpasta_standardisation_format}", + params.taxpasta_add_taxonomy ? "-p" : "", + params.taxpasta_add_samplesheet ? "-s" :"" + ].join(',').replaceAll(','," ") + } + publishDir = [ + path: { "${params.outdir}/taxpasta/" }, + mode: params.publish_dir_mode, + pattern: '*.{tsv,csv,arrow,parquet,biom}' + ] + } + withName: CUSTOM_DUMPSOFTWAREVERSIONS { publishDir = [ path: { "${params.outdir}/pipeline_info" }, diff --git a/conf/test.config b/conf/test.config index cfd371a..682d087 100644 --- a/conf/test.config +++ b/conf/test.config @@ -25,7 +25,7 @@ params { perform_shortread_qc = true perform_longread_qc = true shortread_qc_mergepairs = true - perform_shortread_complexityfilter = true + perform_shortread_complexityfilter = false perform_shortread_hostremoval = true perform_longread_hostremoval = true perform_runmerging = true @@ -44,7 +44,7 @@ params { malt_save_reads = true kraken2_save_reads = true centrifuge_save_reads = true - diamond_save_reads = true + run_profile_standardisation = true } process { diff --git a/modules.json b/modules.json index 2375da6..9b87f43 100644 --- a/modules.json +++ b/modules.json @@ -8,212 +8,301 @@ "adapterremoval": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bbmap/bbduk": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bowtie2/align": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bowtie2/build": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bracken/bracken": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bracken/combinebrackenoutputs": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "cat/fastq": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "centrifuge/centrifuge": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "centrifuge/kreport": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "custom/dumpsoftwareversions": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "diamond/blastx": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "falco": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/falco/falco.diff" }, "fastp": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "fastqc": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "filtlong": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gunzip": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "kaiju/kaiju": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "kaiju/kaiju2krona": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "kaiju/kaiju2table": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "kraken2/kraken2": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "krakentools/combinekreports": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "krakentools/kreport2krona": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "krakenuniq/preloadedkrakenuniq": { "branch": "master", "git_sha": "a6eb17f65b3ee5761c25c075a6166c9f76733cee", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "krona/ktimporttaxonomy": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "krona/ktimporttext": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "malt/run": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "megan/rma2info": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "metaphlan3/mergemetaphlantables": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "metaphlan3/metaphlan3": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "minimap2/align": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "minimap2/index": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "motus/merge": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "motus/profile": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "multiqc": { "branch": "master", "git_sha": "ee80d14721e76e2e079103b8dcd5d57129e584ba", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "porechop/porechop": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/porechop/porechop/porechop-porechop.diff" }, "prinseqplusplus": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/bam2fq": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/index": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/stats": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/view": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] + }, + "taxpasta/merge": { + "branch": "master", + "git_sha": "74ab450ed05e034d049c00f6e2853de2c31594b4", + "installed_by": [ + "modules" + ] }, "untar": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] } } } } } -} +} \ No newline at end of file diff --git a/modules/nf-core/taxpasta/merge/main.nf b/modules/nf-core/taxpasta/merge/main.nf new file mode 100644 index 0000000..67a6f25 --- /dev/null +++ b/modules/nf-core/taxpasta/merge/main.nf @@ -0,0 +1,47 @@ +process TAXPASTA_MERGE { + tag "$meta.id" + label 'process_single' + + conda "bioconda::taxpasta=0.1.1" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/taxpasta:0.1.1--pyhdfd78af_0': + 'quay.io/biocontainers/taxpasta:0.1.1--pyhdfd78af_0' }" + + + input: + tuple val(meta), path(profiles) + path taxonomy + path samplesheet + + output: + tuple val(meta), path("*.{tsv,csv,arrow,parquet,biom}"), emit: merged_profiles + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // N.B.: Taxpasta requires a --profiler option and will fail without it. + // This must be specified via a `nextflow.config` or `modules.config`, for + // example, as "--profiler kraken2". Additionally, it requires a --output + // option with the output file name. The desired format will be parsed from + // the name and should correspond to the output pattern specified above, + // e.g., "--output ${task.ext.prefix}.tsv". + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def taxonomy_option = taxonomy ? "--taxonomy ${taxonomy}" : '' + def samplesheet_input = samplesheet ? "-s ${samplesheet}" : '' + """ + taxpasta merge \\ + $args \\ + $taxonomy_option \\ + $samplesheet_input \\ + $profiles + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + taxpasta: \$(taxpasta --version) + END_VERSIONS + """ +} diff --git a/modules/nf-core/taxpasta/merge/meta.yml b/modules/nf-core/taxpasta/merge/meta.yml new file mode 100644 index 0000000..79d301f --- /dev/null +++ b/modules/nf-core/taxpasta/merge/meta.yml @@ -0,0 +1,58 @@ +name: "taxpasta_merge" +description: Standardise and merge two or more taxonomic profiles into a single table +keywords: + - taxonomic profile + - standardise + - standardisation + - metagenomics + - taxonomic profiling + - otu tables + - taxon tables +tools: + - "taxpasta": + description: "TAXonomic Profile Aggregation and STAndardisation" + homepage: "https://taxpasta.readthedocs.io/" + documentation: "https://taxpasta.readthedocs.io/" + tool_dev_url: "https://github.com/taxprofiler/taxpasta" + doi: "" + licence: "['Apache-2.0']" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - profiles: + type: file + description: A list of taxonomic profiler output files (typically in text format, mandatory) + pattern: "*.{tsv,csv,arrow,parquet,biom}" + - samplesheet: + type: file + description: + A samplesheet describing the sample name and a filepath to a taxonomic abundance profile that needs to be relative + from the work environment. The profiles must be provided even if you give a samplesheet as argument (optional) + pattern: "*.{tsv,csv,ods,xlsx,arrow,parquet}" + - taxonomy: + type: directory + description: Directory containing at a minimum nodes.dmp and names.dmp files (optional) + pattern: "*/" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - merged_profiles: + type: file + description: Output file with standardised multiple profiles in one go and have all profiles combined into a single table. + pattern: "*.{tsv,csv,ods,xlsx,arrow,parquet,biom}" + +authors: + - "@sofstam" + - "@jfy133" diff --git a/nextflow.config b/nextflow.config index 45a3cc7..52a72db 100644 --- a/nextflow.config +++ b/nextflow.config @@ -155,8 +155,11 @@ params { krona_taxonomy_directory = null // profile standardisation - run_profile_standardisation = false - generate_biom_output = false + run_profile_standardisation = false + taxpasta_add_taxonomy = false + taxpasta_add_samplesheet = false + taxpasta_standardisation_format = 'tsv' + generate_biom_output = false } // Load base.config by default for all pipelines @@ -242,6 +245,15 @@ profiles { executor.cpus = 16 executor.memory = 60.GB } + hasta { + includeConfig 'conf/hasta.config' + } + dev_priority { + params { + priority = 'development' + clusterOptions = "--qos=low" + } + } test { includeConfig 'conf/test.config' } test_full { includeConfig 'conf/test_full.config' } test_noprofiling { includeConfig 'conf/test_noprofiling.config' } diff --git a/nextflow_schema.json b/nextflow_schema.json index 89cad56..a84a232 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -767,5 +767,19 @@ { "$ref": "#/definitions/reference_genome_options" } - ] + ], + "properties": { + "add_taxonomy": { + "type": "string", + "default": "false" + }, + "add_samplesheet": { + "type": "string", + "default": "false" + }, + "standardisation_taxpasta_format": { + "type": "string", + "default": "tsv" + } + } } diff --git a/subworkflows/local/standardisation_profiles.nf b/subworkflows/local/standardisation_profiles.nf index 582aaed..0a416a9 100644 --- a/subworkflows/local/standardisation_profiles.nf +++ b/subworkflows/local/standardisation_profiles.nf @@ -8,6 +8,7 @@ include { KRAKENTOOLS_COMBINEKREPORTS as KRAKENTOOLS_COMBINEKREPORTS_KRAKEN include { KRAKENTOOLS_COMBINEKREPORTS as KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE } from '../../modules/nf-core/krakentools/combinekreports/main' include { METAPHLAN3_MERGEMETAPHLANTABLES } from '../../modules/nf-core/metaphlan3/mergemetaphlantables/main' include { MOTUS_MERGE } from '../../modules/nf-core/motus/merge/main' +include { TAXPASTA_MERGE } from '../../modules/nf-core/taxpasta/merge/main' workflow STANDARDISATION_PROFILES { take: @@ -21,6 +22,20 @@ workflow STANDARDISATION_PROFILES { ch_versions = Channel.empty() ch_multiqc_files = Channel.empty() + //Taxpasta standardisation + ch_input_for_taxpasta = profiles + .map { + meta, profile -> + def meta_new = [:] + meta_new.id = meta.db_name + meta_new.tool = meta.tool == 'metaphlan3' ? 'metaphlan' : meta.tool == 'malt' ? 'megan6' : meta.tool + [meta_new, profile] + } + .groupTuple () + + TAXPASTA_MERGE (ch_input_for_taxpasta, [], []) + + /* Split profile results based on tool they come from */ @@ -74,6 +89,8 @@ workflow STANDARDISATION_PROFILES { [[id:it[0]], it[1]] } + + KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE ( ch_profiles_for_centrifuge ) ch_standardised_tables = ch_standardised_tables.mix( KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE.out.txt ) ch_multiqc_files = ch_multiqc_files.mix( KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE.out.txt ) @@ -125,6 +142,8 @@ workflow STANDARDISATION_PROFILES { ch_multiqc_files = ch_multiqc_files.mix( METAPHLAN3_MERGEMETAPHLANTABLES.out.txt ) ch_versions = ch_versions.mix( METAPHLAN3_MERGEMETAPHLANTABLES.out.versions ) + ch_standardised_tables.dump (tag: 'standardised') + // mOTUs // mOTUs has a 'single' database, and cannot create custom ones. @@ -149,6 +168,7 @@ workflow STANDARDISATION_PROFILES { emit: tables = ch_standardised_tables + taxpasta = TAXPASTA_MERGE.out.merged_profiles versions = ch_versions mqc = ch_multiqc_files }