From 68d0cc00faaa50c45ec77a25158f3749d22d4f95 Mon Sep 17 00:00:00 2001 From: JIANHONG OU Date: Mon, 23 May 2022 08:05:06 -0400 Subject: [PATCH] add motus profile. --- conf/test.config | 1 + conf/test_nopreprocessing.config | 1 + conf/test_noprofiling.config | 1 + modules.json | 8 ++- .../nf-core/modules/motus/downloaddb/main.nf | 39 ++++++++++++ .../nf-core/modules/motus/downloaddb/meta.yml | 39 ++++++++++++ modules/nf-core/modules/motus/profile/main.nf | 54 ++++++++++++++++ .../nf-core/modules/motus/profile/meta.yml | 61 +++++++++++++++++++ nextflow.config | 5 ++ nextflow_schema.json | 11 ++++ .../execution_trace_2022-05-21_11-05-12.txt | 1 + subworkflows/local/db_check.nf | 21 ++++++- subworkflows/local/profiling.nf | 19 +++++- 13 files changed, 256 insertions(+), 5 deletions(-) create mode 100644 modules/nf-core/modules/motus/downloaddb/main.nf create mode 100644 modules/nf-core/modules/motus/downloaddb/meta.yml create mode 100644 modules/nf-core/modules/motus/profile/main.nf create mode 100644 modules/nf-core/modules/motus/profile/meta.yml create mode 100644 null/pipeline_info/execution_trace_2022-05-21_11-05-12.txt diff --git a/conf/test.config b/conf/test.config index c687a86..cf983ab 100644 --- a/conf/test.config +++ b/conf/test.config @@ -37,6 +37,7 @@ params { run_metaphlan3 = true run_centrifuge = true run_diamond = true + run_motus = true } process { diff --git a/conf/test_nopreprocessing.config b/conf/test_nopreprocessing.config index e8d4ed9..7658a2d 100644 --- a/conf/test_nopreprocessing.config +++ b/conf/test_nopreprocessing.config @@ -37,6 +37,7 @@ params { run_metaphlan3 = true run_centrifuge = true run_diamond = true + run_motus = true } process { diff --git a/conf/test_noprofiling.config b/conf/test_noprofiling.config index f908651..dffb44e 100644 --- a/conf/test_noprofiling.config +++ b/conf/test_noprofiling.config @@ -37,6 +37,7 @@ params { run_metaphlan3 = false run_centrifuge = false run_diamond = false + run_motus = false } process { diff --git a/modules.json b/modules.json index a55c88b..2fc7203 100644 --- a/modules.json +++ b/modules.json @@ -60,6 +60,12 @@ "minimap2/index": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, + "motus/downloaddb": { + "git_sha": "6393a085c5fcea11963774c041808df169907487" + }, + "motus/profile": { + "git_sha": "6b960f0e75bbb4d5bd301cd3875fa078d0eab4d1" + }, "multiqc": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, @@ -80,4 +86,4 @@ } } } -} +} \ No newline at end of file diff --git a/modules/nf-core/modules/motus/downloaddb/main.nf b/modules/nf-core/modules/motus/downloaddb/main.nf new file mode 100644 index 0000000..317624b --- /dev/null +++ b/modules/nf-core/modules/motus/downloaddb/main.nf @@ -0,0 +1,39 @@ +process MOTUS_DOWNLOADDB { + label 'process_low' + + conda (params.enable_conda ? "bioconda::motus=3.0.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/motus:3.0.1--pyhdfd78af_0': + 'quay.io/biocontainers/motus:3.0.1--pyhdfd78af_0' }" + + input: + path motus_downloaddb_script + + output: + path "db_mOTU/" , emit: db + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def software = "${motus_downloaddb_script.simpleName}_copy.py" + """ + ## must copy script file to working directory, + ## otherwise the reference_db will be download to bin folder + ## other than current directory + cp $motus_downloaddb_script ${software} + python ${software} \\ + $args \\ + -t $task.cpus + + ## mOTUs version number is not available from command line. + ## mOTUs save the version number in index database folder. + ## mOTUs will check the database version is same version as exec version. + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mOTUs: \$(grep motus db_mOTU/db_mOTU_versions | sed 's/motus\\t//g') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/motus/downloaddb/meta.yml b/modules/nf-core/modules/motus/downloaddb/meta.yml new file mode 100644 index 0000000..64df5ee --- /dev/null +++ b/modules/nf-core/modules/motus/downloaddb/meta.yml @@ -0,0 +1,39 @@ +name: "motus_downloaddb" +description: Download the mOTUs database +keywords: + - classify + - metagenomics + - fastq + - taxonomic profiling + - database + - download +tools: + - "motus": + description: "The mOTU profiler is a computational tool that estimates relative taxonomic abundance of known and currently unknown microbial community members using metagenomic shotgun sequencing data." + homepage: "None" + documentation: "https://github.com/motu-tool/mOTUs/wiki" + tool_dev_url: "https://github.com/motu-tool/mOTUs" + doi: "10.1038/s41467-019-08844-4" + licence: "['GPL v3']" + +input: + - motus_downloaddb: + type: directory + description: | + The mOTUs downloadDB script source file. + It is the source file installed or + remote source in github such as https://raw.githubusercontent.com/motu-tool/mOTUs/master/motus/downloadDB.py + pattern: "downloadDB.py" + +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - db: + type: directory + description: The mOTUs database directory + pattern: "db_mOTU" + +authors: + - "@jianhong" diff --git a/modules/nf-core/modules/motus/profile/main.nf b/modules/nf-core/modules/motus/profile/main.nf new file mode 100644 index 0000000..6a1acd3 --- /dev/null +++ b/modules/nf-core/modules/motus/profile/main.nf @@ -0,0 +1,54 @@ +process MOTUS_PROFILE { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::motus=3.0.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/motus:3.0.1--pyhdfd78af_0': + 'quay.io/biocontainers/motus:3.0.1--pyhdfd78af_0' }" + + input: + tuple val(meta), path(reads) + path db + + output: + tuple val(meta), path("*.out"), emit: out + tuple val(meta), path("*.bam"), optional: true, emit: bam + tuple val(meta), path("*.mgc"), optional: true, emit: mgc + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def inputs = reads[0].getExtension() == 'bam' ? + "-i ${reads}" : + reads[0].getExtension() == 'mgc' ? "-m $reads" : + meta.single_end ? + "-s $reads" : "-f ${reads[0]} -r ${reads[1]}" + def refdb = db ? "-db ${db}" : "" + """ + motus profile \\ + $args \\ + $inputs \\ + $refdb \\ + -t $task.cpus \\ + -n $prefix \\ + -o ${prefix}.out + + ## mOTUs version number is not available from command line. + ## mOTUs save the version number in index database folder. + ## mOTUs will check the database version is same version as exec version. + if [ "$db" == "" ]; then + VERSION=\$(echo \$(motus -h 2>&1) | sed 's/^.*Version: //; s/References.*\$//') + else + VERSION=\$(grep motus $db/db_mOTU_versions | sed 's/motus\\t//g') + fi + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mOTUs: \$VERSION + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/motus/profile/meta.yml b/modules/nf-core/modules/motus/profile/meta.yml new file mode 100644 index 0000000..19803bd --- /dev/null +++ b/modules/nf-core/modules/motus/profile/meta.yml @@ -0,0 +1,61 @@ +name: "motus_profile" +description: Taxonomic meta-omics profiling using universal marker genes +keywords: + - classify + - metagenomics + - fastq + - taxonomic profiling +tools: + - "motus": + description: "Marker gene-based OTU (mOTU) profiling" + homepage: "https://motu-tool.org/" + documentation: "https://github.com/motu-tool/mOTUs/wiki" + tool_dev_url: "https://github.com/motu-tool/mOTUs" + doi: "10.1038/s41467-019-08844-4" + licence: "['GPL v3']" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input fastq/fasta files of size 1 and 2 for single-end and paired-end data, + respectively. + Or the intermediate bam file mapped by bwa to the mOTUs database or + the output bam file from motus profile. + Or the intermediate mgc read counts table. + pattern: "*.{fastq,fq,fasta,fa,fastq.gz,fq.gz,fasta.gz,fa.gz,.bam,.mgc}" + - db: + type: directory + description: | + mOTUs database downloaded by `motus downloadDB` + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - out: + type: file + description: Results with taxonomic classification of each read + pattern: "*.out" + - bam: + type: file + description: Optional intermediate sorted BAM file from BWA + pattern: "*.{bam}" + - mgc: + type: file + description: Optional intermediate mgc read count table file saved with `-M`. + pattern: "*.{mgc}" + +authors: + - "@jianhong" diff --git a/nextflow.config b/nextflow.config index 411e7a6..6c39ccd 100644 --- a/nextflow.config +++ b/nextflow.config @@ -116,6 +116,11 @@ params { // diamond run_diamond = false diamond_output_format = 'txt' + + // mOTUs + run_motus = false + download_motus_db = true + motus_downloaddb_script = 'https://raw.githubusercontent.com/motu-tool/mOTUs/master/motus/downloadDB.py' } // Load base.config by default for all pipelines diff --git a/nextflow_schema.json b/nextflow_schema.json index 74fab27..198a937 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -409,6 +409,17 @@ "shortread_complexityfilter_fastp_threshold": { "type": "integer", "default": 30 + }, + "run_motus": { + "type": "boolean" + }, + "download_motus_db": { + "type": "boolean" + }, + "motus_downloaddb_script": { + "type": "string", + "default": "https://raw.githubusercontent.com/motu-tool/mOTUs/master/motus/downloadDB.py", + "description": "mOTUs database download script path." } } } diff --git a/null/pipeline_info/execution_trace_2022-05-21_11-05-12.txt b/null/pipeline_info/execution_trace_2022-05-21_11-05-12.txt new file mode 100644 index 0000000..6b739ac --- /dev/null +++ b/null/pipeline_info/execution_trace_2022-05-21_11-05-12.txt @@ -0,0 +1 @@ +task_id hash native_id name status exit submit duration realtime %cpu peak_rss peak_vmem rchar wchar diff --git a/subworkflows/local/db_check.nf b/subworkflows/local/db_check.nf index f3464d5..95eeefc 100644 --- a/subworkflows/local/db_check.nf +++ b/subworkflows/local/db_check.nf @@ -2,8 +2,9 @@ // Check input samplesheet and get read channels // -include { DATABASE_CHECK } from '../../modules/local/database_check' -include { UNTAR } from '../../modules/nf-core/modules/untar/main' +include { DATABASE_CHECK } from '../../modules/local/database_check' +include { UNTAR } from '../../modules/nf-core/modules/untar/main' +include { MOTUS_DOWNLOADDB } from '../../modules/nf-core/modules/motus/downloaddb/main' workflow DB_CHECK { take: @@ -20,6 +21,22 @@ workflow DB_CHECK { .splitCsv ( header:true, sep:',' ) .map { create_db_channels(it) } + // Download database for mOTUs + if( params.run_motus ){ + check_motus_db = + parsed_samplesheet.filter{ it[0].tool == "motus" } + .ifEmpty{[]} + if( params.download_motus_db ){ + MOTUS_DOWNLOADDB( params.motus_downloaddb_script ) + check_motus_db = MOTUS_DOWNLOADDB.out.db + .map{[ + [tool: "motus", db_name: "db_mOTU", db_params: ''], + it + ]} + } + parsed_samplesheet = parsed_samplesheet.mix(check_motus_db) + } + ch_dbs_for_untar = parsed_samplesheet .branch { untar: it[1].toString().endsWith(".tar.gz") diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index 7fb3ce9..109598d 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -11,7 +11,7 @@ include { METAPHLAN3 } from '../../modules/nf-core/modules/meta include { KAIJU_KAIJU } from '../../modules/nf-core/modules/kaiju/kaiju/main' include { KAIJU_KAIJU2TABLE } from '../../modules/nf-core/modules/kaiju/kaiju2table/main' include { DIAMOND_BLASTX } from '../../modules/nf-core/modules/diamond/blastx/main' - +include { MOTUS_PROFILE } from '../../modules/nf-core/modules/motus/profile/main' workflow PROFILING { take: @@ -44,6 +44,7 @@ workflow PROFILING { centrifuge: it[2]['tool'] == 'centrifuge' kaiju: it[2]['tool'] == 'kaiju' diamond: it[2]['tool'] == 'diamond' + motus: it[2]['tool'] == 'motus' unknown: true } @@ -186,9 +187,23 @@ workflow PROFILING { } + if ( params.run_motus ) { + + ch_input_for_motus = ch_input_for_profiling.motus + .multiMap { + it -> + reads: [it[0] + it[2], it[1]] + db: it[3] + } + + MOTUS_PROFILE ( ch_input_for_motus.reads, ch_input_for_motus.db ) + ch_versions = ch_versions.mix( MOTUS_PROFILE.out.versions.first() ) + ch_raw_profiles = ch_raw_profiles.mix( MOTUS_PROFILE.out.out ) + + } + emit: profiles = ch_raw_profiles // channel: [ val(meta), [ reads ] ] - should be text files or biom versions = ch_versions // channel: [ versions.yml ] mqc = ch_multiqc_files } -