From 68d0cc00faaa50c45ec77a25158f3749d22d4f95 Mon Sep 17 00:00:00 2001 From: JIANHONG OU Date: Mon, 23 May 2022 08:05:06 -0400 Subject: [PATCH 01/12] add motus profile. --- conf/test.config | 1 + conf/test_nopreprocessing.config | 1 + conf/test_noprofiling.config | 1 + modules.json | 8 ++- .../nf-core/modules/motus/downloaddb/main.nf | 39 ++++++++++++ .../nf-core/modules/motus/downloaddb/meta.yml | 39 ++++++++++++ modules/nf-core/modules/motus/profile/main.nf | 54 ++++++++++++++++ .../nf-core/modules/motus/profile/meta.yml | 61 +++++++++++++++++++ nextflow.config | 5 ++ nextflow_schema.json | 11 ++++ .../execution_trace_2022-05-21_11-05-12.txt | 1 + subworkflows/local/db_check.nf | 21 ++++++- subworkflows/local/profiling.nf | 19 +++++- 13 files changed, 256 insertions(+), 5 deletions(-) create mode 100644 modules/nf-core/modules/motus/downloaddb/main.nf create mode 100644 modules/nf-core/modules/motus/downloaddb/meta.yml create mode 100644 modules/nf-core/modules/motus/profile/main.nf create mode 100644 modules/nf-core/modules/motus/profile/meta.yml create mode 100644 null/pipeline_info/execution_trace_2022-05-21_11-05-12.txt diff --git a/conf/test.config b/conf/test.config index c687a86..cf983ab 100644 --- a/conf/test.config +++ b/conf/test.config @@ -37,6 +37,7 @@ params { run_metaphlan3 = true run_centrifuge = true run_diamond = true + run_motus = true } process { diff --git a/conf/test_nopreprocessing.config b/conf/test_nopreprocessing.config index e8d4ed9..7658a2d 100644 --- a/conf/test_nopreprocessing.config +++ b/conf/test_nopreprocessing.config @@ -37,6 +37,7 @@ params { run_metaphlan3 = true run_centrifuge = true run_diamond = true + run_motus = true } process { diff --git a/conf/test_noprofiling.config b/conf/test_noprofiling.config index f908651..dffb44e 100644 --- a/conf/test_noprofiling.config +++ b/conf/test_noprofiling.config @@ -37,6 +37,7 @@ params { run_metaphlan3 = false run_centrifuge = false run_diamond = false + run_motus = false } process { diff --git a/modules.json b/modules.json index a55c88b..2fc7203 100644 --- a/modules.json +++ b/modules.json @@ -60,6 +60,12 @@ "minimap2/index": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, + "motus/downloaddb": { + "git_sha": "6393a085c5fcea11963774c041808df169907487" + }, + "motus/profile": { + "git_sha": "6b960f0e75bbb4d5bd301cd3875fa078d0eab4d1" + }, "multiqc": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, @@ -80,4 +86,4 @@ } } } -} +} \ No newline at end of file diff --git a/modules/nf-core/modules/motus/downloaddb/main.nf b/modules/nf-core/modules/motus/downloaddb/main.nf new file mode 100644 index 0000000..317624b --- /dev/null +++ b/modules/nf-core/modules/motus/downloaddb/main.nf @@ -0,0 +1,39 @@ +process MOTUS_DOWNLOADDB { + label 'process_low' + + conda (params.enable_conda ? "bioconda::motus=3.0.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/motus:3.0.1--pyhdfd78af_0': + 'quay.io/biocontainers/motus:3.0.1--pyhdfd78af_0' }" + + input: + path motus_downloaddb_script + + output: + path "db_mOTU/" , emit: db + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def software = "${motus_downloaddb_script.simpleName}_copy.py" + """ + ## must copy script file to working directory, + ## otherwise the reference_db will be download to bin folder + ## other than current directory + cp $motus_downloaddb_script ${software} + python ${software} \\ + $args \\ + -t $task.cpus + + ## mOTUs version number is not available from command line. + ## mOTUs save the version number in index database folder. + ## mOTUs will check the database version is same version as exec version. + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mOTUs: \$(grep motus db_mOTU/db_mOTU_versions | sed 's/motus\\t//g') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/motus/downloaddb/meta.yml b/modules/nf-core/modules/motus/downloaddb/meta.yml new file mode 100644 index 0000000..64df5ee --- /dev/null +++ b/modules/nf-core/modules/motus/downloaddb/meta.yml @@ -0,0 +1,39 @@ +name: "motus_downloaddb" +description: Download the mOTUs database +keywords: + - classify + - metagenomics + - fastq + - taxonomic profiling + - database + - download +tools: + - "motus": + description: "The mOTU profiler is a computational tool that estimates relative taxonomic abundance of known and currently unknown microbial community members using metagenomic shotgun sequencing data." + homepage: "None" + documentation: "https://github.com/motu-tool/mOTUs/wiki" + tool_dev_url: "https://github.com/motu-tool/mOTUs" + doi: "10.1038/s41467-019-08844-4" + licence: "['GPL v3']" + +input: + - motus_downloaddb: + type: directory + description: | + The mOTUs downloadDB script source file. + It is the source file installed or + remote source in github such as https://raw.githubusercontent.com/motu-tool/mOTUs/master/motus/downloadDB.py + pattern: "downloadDB.py" + +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - db: + type: directory + description: The mOTUs database directory + pattern: "db_mOTU" + +authors: + - "@jianhong" diff --git a/modules/nf-core/modules/motus/profile/main.nf b/modules/nf-core/modules/motus/profile/main.nf new file mode 100644 index 0000000..6a1acd3 --- /dev/null +++ b/modules/nf-core/modules/motus/profile/main.nf @@ -0,0 +1,54 @@ +process MOTUS_PROFILE { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::motus=3.0.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/motus:3.0.1--pyhdfd78af_0': + 'quay.io/biocontainers/motus:3.0.1--pyhdfd78af_0' }" + + input: + tuple val(meta), path(reads) + path db + + output: + tuple val(meta), path("*.out"), emit: out + tuple val(meta), path("*.bam"), optional: true, emit: bam + tuple val(meta), path("*.mgc"), optional: true, emit: mgc + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def inputs = reads[0].getExtension() == 'bam' ? + "-i ${reads}" : + reads[0].getExtension() == 'mgc' ? "-m $reads" : + meta.single_end ? + "-s $reads" : "-f ${reads[0]} -r ${reads[1]}" + def refdb = db ? "-db ${db}" : "" + """ + motus profile \\ + $args \\ + $inputs \\ + $refdb \\ + -t $task.cpus \\ + -n $prefix \\ + -o ${prefix}.out + + ## mOTUs version number is not available from command line. + ## mOTUs save the version number in index database folder. + ## mOTUs will check the database version is same version as exec version. + if [ "$db" == "" ]; then + VERSION=\$(echo \$(motus -h 2>&1) | sed 's/^.*Version: //; s/References.*\$//') + else + VERSION=\$(grep motus $db/db_mOTU_versions | sed 's/motus\\t//g') + fi + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mOTUs: \$VERSION + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/motus/profile/meta.yml b/modules/nf-core/modules/motus/profile/meta.yml new file mode 100644 index 0000000..19803bd --- /dev/null +++ b/modules/nf-core/modules/motus/profile/meta.yml @@ -0,0 +1,61 @@ +name: "motus_profile" +description: Taxonomic meta-omics profiling using universal marker genes +keywords: + - classify + - metagenomics + - fastq + - taxonomic profiling +tools: + - "motus": + description: "Marker gene-based OTU (mOTU) profiling" + homepage: "https://motu-tool.org/" + documentation: "https://github.com/motu-tool/mOTUs/wiki" + tool_dev_url: "https://github.com/motu-tool/mOTUs" + doi: "10.1038/s41467-019-08844-4" + licence: "['GPL v3']" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input fastq/fasta files of size 1 and 2 for single-end and paired-end data, + respectively. + Or the intermediate bam file mapped by bwa to the mOTUs database or + the output bam file from motus profile. + Or the intermediate mgc read counts table. + pattern: "*.{fastq,fq,fasta,fa,fastq.gz,fq.gz,fasta.gz,fa.gz,.bam,.mgc}" + - db: + type: directory + description: | + mOTUs database downloaded by `motus downloadDB` + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - out: + type: file + description: Results with taxonomic classification of each read + pattern: "*.out" + - bam: + type: file + description: Optional intermediate sorted BAM file from BWA + pattern: "*.{bam}" + - mgc: + type: file + description: Optional intermediate mgc read count table file saved with `-M`. + pattern: "*.{mgc}" + +authors: + - "@jianhong" diff --git a/nextflow.config b/nextflow.config index 411e7a6..6c39ccd 100644 --- a/nextflow.config +++ b/nextflow.config @@ -116,6 +116,11 @@ params { // diamond run_diamond = false diamond_output_format = 'txt' + + // mOTUs + run_motus = false + download_motus_db = true + motus_downloaddb_script = 'https://raw.githubusercontent.com/motu-tool/mOTUs/master/motus/downloadDB.py' } // Load base.config by default for all pipelines diff --git a/nextflow_schema.json b/nextflow_schema.json index 74fab27..198a937 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -409,6 +409,17 @@ "shortread_complexityfilter_fastp_threshold": { "type": "integer", "default": 30 + }, + "run_motus": { + "type": "boolean" + }, + "download_motus_db": { + "type": "boolean" + }, + "motus_downloaddb_script": { + "type": "string", + "default": "https://raw.githubusercontent.com/motu-tool/mOTUs/master/motus/downloadDB.py", + "description": "mOTUs database download script path." } } } diff --git a/null/pipeline_info/execution_trace_2022-05-21_11-05-12.txt b/null/pipeline_info/execution_trace_2022-05-21_11-05-12.txt new file mode 100644 index 0000000..6b739ac --- /dev/null +++ b/null/pipeline_info/execution_trace_2022-05-21_11-05-12.txt @@ -0,0 +1 @@ +task_id hash native_id name status exit submit duration realtime %cpu peak_rss peak_vmem rchar wchar diff --git a/subworkflows/local/db_check.nf b/subworkflows/local/db_check.nf index f3464d5..95eeefc 100644 --- a/subworkflows/local/db_check.nf +++ b/subworkflows/local/db_check.nf @@ -2,8 +2,9 @@ // Check input samplesheet and get read channels // -include { DATABASE_CHECK } from '../../modules/local/database_check' -include { UNTAR } from '../../modules/nf-core/modules/untar/main' +include { DATABASE_CHECK } from '../../modules/local/database_check' +include { UNTAR } from '../../modules/nf-core/modules/untar/main' +include { MOTUS_DOWNLOADDB } from '../../modules/nf-core/modules/motus/downloaddb/main' workflow DB_CHECK { take: @@ -20,6 +21,22 @@ workflow DB_CHECK { .splitCsv ( header:true, sep:',' ) .map { create_db_channels(it) } + // Download database for mOTUs + if( params.run_motus ){ + check_motus_db = + parsed_samplesheet.filter{ it[0].tool == "motus" } + .ifEmpty{[]} + if( params.download_motus_db ){ + MOTUS_DOWNLOADDB( params.motus_downloaddb_script ) + check_motus_db = MOTUS_DOWNLOADDB.out.db + .map{[ + [tool: "motus", db_name: "db_mOTU", db_params: ''], + it + ]} + } + parsed_samplesheet = parsed_samplesheet.mix(check_motus_db) + } + ch_dbs_for_untar = parsed_samplesheet .branch { untar: it[1].toString().endsWith(".tar.gz") diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index 7fb3ce9..109598d 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -11,7 +11,7 @@ include { METAPHLAN3 } from '../../modules/nf-core/modules/meta include { KAIJU_KAIJU } from '../../modules/nf-core/modules/kaiju/kaiju/main' include { KAIJU_KAIJU2TABLE } from '../../modules/nf-core/modules/kaiju/kaiju2table/main' include { DIAMOND_BLASTX } from '../../modules/nf-core/modules/diamond/blastx/main' - +include { MOTUS_PROFILE } from '../../modules/nf-core/modules/motus/profile/main' workflow PROFILING { take: @@ -44,6 +44,7 @@ workflow PROFILING { centrifuge: it[2]['tool'] == 'centrifuge' kaiju: it[2]['tool'] == 'kaiju' diamond: it[2]['tool'] == 'diamond' + motus: it[2]['tool'] == 'motus' unknown: true } @@ -186,9 +187,23 @@ workflow PROFILING { } + if ( params.run_motus ) { + + ch_input_for_motus = ch_input_for_profiling.motus + .multiMap { + it -> + reads: [it[0] + it[2], it[1]] + db: it[3] + } + + MOTUS_PROFILE ( ch_input_for_motus.reads, ch_input_for_motus.db ) + ch_versions = ch_versions.mix( MOTUS_PROFILE.out.versions.first() ) + ch_raw_profiles = ch_raw_profiles.mix( MOTUS_PROFILE.out.out ) + + } + emit: profiles = ch_raw_profiles // channel: [ val(meta), [ reads ] ] - should be text files or biom versions = ch_versions // channel: [ versions.yml ] mqc = ch_multiqc_files } - From 4b3216c1cb494c8110c361df25524f7515cc531e Mon Sep 17 00:00:00 2001 From: JIANHONG OU Date: Tue, 24 May 2022 08:42:18 -0400 Subject: [PATCH 02/12] filter fasta inputs for mOTUs; fix the typo for nopreprocessing in nextflow.config. --- nextflow.config | 2 +- subworkflows/local/profiling.nf | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index 6c39ccd..a8240d1 100644 --- a/nextflow.config +++ b/nextflow.config @@ -192,7 +192,7 @@ profiles { test { includeConfig 'conf/test.config' } test_full { includeConfig 'conf/test_full.config' } test_noprofiling { includeConfig 'conf/test_noprofiling.config' } - test_nopreprocessing { includeConfig 'conf/test_preprocessing.config' } + test_nopreprocessing { includeConfig 'conf/test_nopreprocessing.config' } } // Load igenomes.config if required diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index 109598d..2813bc7 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -190,6 +190,10 @@ workflow PROFILING { if ( params.run_motus ) { ch_input_for_motus = ch_input_for_profiling.motus + .filter{ + if (it[0].is_fasta) log.warn "[nf-core/taxprofiler] mOTUs currently does not accept FASTA files as input. Skipping mOTUs for sample ${it[0].id}." + !it[0].is_fasta + } .multiMap { it -> reads: [it[0] + it[2], it[1]] From f14141574e15b83165507c2223e17fd927d70018 Mon Sep 17 00:00:00 2001 From: JIANHONG OU Date: Wed, 25 May 2022 08:03:23 -0400 Subject: [PATCH 03/12] fix the end line issue for modules.json --- modules.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules.json b/modules.json index 2fc7203..d4172ae 100644 --- a/modules.json +++ b/modules.json @@ -86,4 +86,4 @@ } } } -} \ No newline at end of file +} From e916c91de3306518fab7f5b3d1773e7cbe86aea8 Mon Sep 17 00:00:00 2001 From: JIANHONG OU Date: Tue, 31 May 2022 07:57:15 -0400 Subject: [PATCH 04/12] Delete null/pipeline_info directory --- null/pipeline_info/execution_trace_2022-05-21_11-05-12.txt | 1 - 1 file changed, 1 deletion(-) delete mode 100644 null/pipeline_info/execution_trace_2022-05-21_11-05-12.txt diff --git a/null/pipeline_info/execution_trace_2022-05-21_11-05-12.txt b/null/pipeline_info/execution_trace_2022-05-21_11-05-12.txt deleted file mode 100644 index 6b739ac..0000000 --- a/null/pipeline_info/execution_trace_2022-05-21_11-05-12.txt +++ /dev/null @@ -1 +0,0 @@ -task_id hash native_id name status exit submit duration realtime %cpu peak_rss peak_vmem rchar wchar From 4d77b47e8d3400c6f348afcf8a2c8c653d1fd8e4 Mon Sep 17 00:00:00 2001 From: JIANHONG OU Date: Tue, 31 May 2022 08:47:38 -0400 Subject: [PATCH 05/12] create a new test profile config for mOTUs. --- .github/workflows/ci.yml | 4 +++- conf/test.config | 2 +- conf/test_motus.config | 41 ++++++++++++++++++++++++++++++++ conf/test_nopreprocessing.config | 2 +- docs/usage.md | 9 +++++++ nextflow.config | 1 + 6 files changed, 56 insertions(+), 3 deletions(-) create mode 100644 conf/test_motus.config diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 689a193..01c5535 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -42,6 +42,8 @@ jobs: - "--perform_runmerging" - "--perform_runmerging --shortread_clipmerge_mergepairs" - "--shortread_complexityfilter false --perform_shortread_hostremoval" + # Test different profiles + profile: ["test", "test_motus"] steps: - name: Check out pipeline code @@ -70,4 +72,4 @@ jobs: # For example: adding multiple test runs with different parameters # Remember that you can parallelise this by using strategy.matrix run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results ${{ matrix.parameters }} + nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.profile }},docker --outdir ./results ${{ matrix.parameters }} diff --git a/conf/test.config b/conf/test.config index cf983ab..3a6d265 100644 --- a/conf/test.config +++ b/conf/test.config @@ -37,7 +37,7 @@ params { run_metaphlan3 = true run_centrifuge = true run_diamond = true - run_motus = true + run_motus = false } process { diff --git a/conf/test_motus.config b/conf/test_motus.config new file mode 100644 index 0000000..c645154 --- /dev/null +++ b/conf/test_motus.config @@ -0,0 +1,41 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/taxprofiler -profile test, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'mOTUs Test profile' + config_profile_description = 'Minimal test to check mOTUs function' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input data + // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets + // TODO nf-core: Give any required params for the test so that command line flags are not needed + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv' + databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv' + perform_shortread_clipmerge = false + perform_longread_clip = false + perform_shortread_complexityfilter = false + perform_shortread_hostremoval = false + perform_longread_hostremoval = false + perform_runmerging = false + hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta' + run_kaiju = false + run_kraken2 = false + run_malt = false + run_metaphlan3 = false + run_centrifuge = false + run_diamond = false + run_motus = true +} diff --git a/conf/test_nopreprocessing.config b/conf/test_nopreprocessing.config index 7658a2d..e52319f 100644 --- a/conf/test_nopreprocessing.config +++ b/conf/test_nopreprocessing.config @@ -37,7 +37,7 @@ params { run_metaphlan3 = true run_centrifuge = true run_diamond = true - run_motus = true + run_motus = false } process { diff --git a/docs/usage.md b/docs/usage.md index 54ffce0..3172b30 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -77,6 +77,7 @@ kraken2,db1,,///kraken2/testdb-kraken2.tar.gz kraken2,db2,--quick,///kraken2/testdb-kraken2.tar.gz centrifuge,db1,,///centrifuge/minigut_cf.tar.gz metaphlan3,db1,,///metaphlan3/metaphlan_database/ +motus,db_mOTU,,///motus/motus_database/ ``` Column specifications are as follows: @@ -131,6 +132,14 @@ Expected (uncompressed) database files for each tool are as follows: - **DIAMOND** output of `diamond makedb`. Note: requires building with taxonomy files to generate taxonomic profile. See [DIAMOND documentation](https://github.com/bbuchfink/diamond/wiki/3.-Command-line-options#makedb-options). A file named: - `.dmnd` +- **mOTUs** is composed of code and database together. The mOTUs tools + [`downloadDB`](https://github.com/motu-tool/mOTUs/blob/master/motus/downloadDB.py) + is used to prepare the mOTUs database and create a file with the version information. + The database download step can be time consuming and the database will be consisting + with same release version of the mOTUs tools. The database for same version tools + can be thus reused for multiple runs. Users can download the database once and + assign the database with the table. User can also set the parameter + `download_motus_db` and let the pipeline download the database automatically. ## Running the pipeline diff --git a/nextflow.config b/nextflow.config index 524c249..98e6cc5 100644 --- a/nextflow.config +++ b/nextflow.config @@ -193,6 +193,7 @@ profiles { test_full { includeConfig 'conf/test_full.config' } test_noprofiling { includeConfig 'conf/test_noprofiling.config' } test_nopreprocessing { includeConfig 'conf/test_nopreprocessing.config' } + test_motus { includeConfig 'conf/test_motus.config' } } // Load igenomes.config if required From f0eb88cbe265e8f4a83577bcba10b614659605f8 Mon Sep 17 00:00:00 2001 From: JIANHONG OU Date: Tue, 31 May 2022 08:57:00 -0400 Subject: [PATCH 06/12] add mOTUs to modules.config --- conf/modules.config | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/conf/modules.config b/conf/modules.config index 5d8398e..c2a3d85 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -337,6 +337,20 @@ process { ] } + withName: MOTUS_DOWNLOADDB { + publishDir = [ + path: { "${params.outdir}/motus/db/${meta.db_name}" }, + mode: params.publish_dir_mode + ] + } + + withName: MOTUS_PROFILE { + publishDir = [ + path: { "${params.outdir}/motus/${meta.db_name}" }, + mode: params.publish_dir_mode + ] + } + withName: CUSTOM_DUMPSOFTWAREVERSIONS { publishDir = [ path: { "${params.outdir}/pipeline_info" }, From c8a59adbceb8790c02f49a31e95a89347ed2fc04 Mon Sep 17 00:00:00 2001 From: JIANHONG OU Date: Fri, 3 Jun 2022 14:13:58 -0400 Subject: [PATCH 07/12] remove the database auto-download for mOTUs. --- .github/workflows/ci.yml | 54 +++++++++++++++++-- conf/modules.config | 7 --- conf/test_motus.config | 2 +- docs/usage.md | 3 +- modules.json | 3 -- .../nf-core/modules/motus/downloaddb/main.nf | 39 -------------- .../nf-core/modules/motus/downloaddb/meta.yml | 39 -------------- nextflow.config | 2 - nextflow_schema.json | 8 --- subworkflows/local/db_check.nf | 17 ------ 10 files changed, 53 insertions(+), 121 deletions(-) delete mode 100644 modules/nf-core/modules/motus/downloaddb/main.nf delete mode 100644 modules/nf-core/modules/motus/downloaddb/meta.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 16a39a6..0088181 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -42,8 +42,6 @@ jobs: - "--perform_runmerging" - "--perform_runmerging --shortread_qc_mergepairs" - "--shortread_complexityfilter false --perform_shortread_hostremoval" - # Test different profiles - profile: ["test", "test_motus"] steps: - name: Check out pipeline code @@ -72,4 +70,54 @@ jobs: # For example: adding multiple test runs with different parameters # Remember that you can parallelise this by using strategy.matrix run: | - nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.profile }},docker --outdir ./results ${{ matrix.parameters }} + nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results ${{ matrix.parameters }} + + motus: + name: Test mOTUs with workflow parameters + if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/taxprofiler') }} + runs-on: ubuntu-latest + env: + NXF_VER: ${{ matrix.nxf_ver }} + NXF_ANSI_LOG: false + strategy: + matrix: + # Nextflow versions + include: + # Test pipeline minimum Nextflow version + - NXF_VER: "21.10.3" + NXF_EDGE: "" + # Test latest edge release of Nextflow + - NXF_VER: "" + NXF_EDGE: "1" + + steps: + - name: Check out pipeline code + uses: actions/checkout@v2 + + - name: Install Nextflow + env: + NXF_VER: ${{ matrix.NXF_VER }} + # Uncomment only if the edge release is more recent than the latest stable release + # See https://github.com/nextflow-io/nextflow/issues/2467 + # NXF_EDGE: ${{ matrix.NXF_EDGE }} + run: | + wget -qO- get.nextflow.io | bash + sudo mv nextflow /usr/local/bin/ + + - name: Show current locale + run: locale + + - name: Set UTF-8 enabled locale + run: | + sudo locale-gen en_US.UTF-8 + sudo update-locale LANG=en_US.UTF-8 + + - name: Prepare the database + run: | + wget https://github.com/motu-tool/mOTUs/blob/master/motus/downloadDB.py + python downloadDB.py + echo 'tool,db_name,db_params,db_path\nmotus,db_mOTU,,db_mOTU' > 'database_motus.csv' + + - name: Run pipeline with test data + run: | + nextflow run ${GITHUB_WORKSPACE} -profile test_motus,docker --outdir ./results --databases ./database_motus.csv diff --git a/conf/modules.config b/conf/modules.config index 1e7c260..66967e2 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -353,13 +353,6 @@ process { ] } - withName: MOTUS_DOWNLOADDB { - publishDir = [ - path: { "${params.outdir}/motus/db/${meta.db_name}" }, - mode: params.publish_dir_mode - ] - } - withName: MOTUS_PROFILE { publishDir = [ path: { "${params.outdir}/motus/${meta.db_name}" }, diff --git a/conf/test_motus.config b/conf/test_motus.config index c645154..9d39ad4 100644 --- a/conf/test_motus.config +++ b/conf/test_motus.config @@ -23,7 +23,7 @@ params { // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets // TODO nf-core: Give any required params for the test so that command line flags are not needed input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv' - databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv' + databases = 'database_motus.csv' perform_shortread_clipmerge = false perform_longread_clip = false perform_shortread_complexityfilter = false diff --git a/docs/usage.md b/docs/usage.md index b43ae12..ea21e54 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -138,8 +138,7 @@ Expected (uncompressed) database files for each tool are as follows: The database download step can be time consuming and the database will be consisting with same release version of the mOTUs tools. The database for same version tools can be thus reused for multiple runs. Users can download the database once and - assign the database with the table. User can also set the parameter - `download_motus_db` and let the pipeline download the database automatically. + assign the database with the table. ## Running the pipeline diff --git a/modules.json b/modules.json index 3e4dd9b..758ac22 100644 --- a/modules.json +++ b/modules.json @@ -63,9 +63,6 @@ "minimap2/index": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, - "motus/downloaddb": { - "git_sha": "6393a085c5fcea11963774c041808df169907487" - }, "motus/profile": { "git_sha": "6b960f0e75bbb4d5bd301cd3875fa078d0eab4d1" }, diff --git a/modules/nf-core/modules/motus/downloaddb/main.nf b/modules/nf-core/modules/motus/downloaddb/main.nf deleted file mode 100644 index 317624b..0000000 --- a/modules/nf-core/modules/motus/downloaddb/main.nf +++ /dev/null @@ -1,39 +0,0 @@ -process MOTUS_DOWNLOADDB { - label 'process_low' - - conda (params.enable_conda ? "bioconda::motus=3.0.1" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/motus:3.0.1--pyhdfd78af_0': - 'quay.io/biocontainers/motus:3.0.1--pyhdfd78af_0' }" - - input: - path motus_downloaddb_script - - output: - path "db_mOTU/" , emit: db - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def software = "${motus_downloaddb_script.simpleName}_copy.py" - """ - ## must copy script file to working directory, - ## otherwise the reference_db will be download to bin folder - ## other than current directory - cp $motus_downloaddb_script ${software} - python ${software} \\ - $args \\ - -t $task.cpus - - ## mOTUs version number is not available from command line. - ## mOTUs save the version number in index database folder. - ## mOTUs will check the database version is same version as exec version. - cat <<-END_VERSIONS > versions.yml - "${task.process}": - mOTUs: \$(grep motus db_mOTU/db_mOTU_versions | sed 's/motus\\t//g') - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/motus/downloaddb/meta.yml b/modules/nf-core/modules/motus/downloaddb/meta.yml deleted file mode 100644 index 64df5ee..0000000 --- a/modules/nf-core/modules/motus/downloaddb/meta.yml +++ /dev/null @@ -1,39 +0,0 @@ -name: "motus_downloaddb" -description: Download the mOTUs database -keywords: - - classify - - metagenomics - - fastq - - taxonomic profiling - - database - - download -tools: - - "motus": - description: "The mOTU profiler is a computational tool that estimates relative taxonomic abundance of known and currently unknown microbial community members using metagenomic shotgun sequencing data." - homepage: "None" - documentation: "https://github.com/motu-tool/mOTUs/wiki" - tool_dev_url: "https://github.com/motu-tool/mOTUs" - doi: "10.1038/s41467-019-08844-4" - licence: "['GPL v3']" - -input: - - motus_downloaddb: - type: directory - description: | - The mOTUs downloadDB script source file. - It is the source file installed or - remote source in github such as https://raw.githubusercontent.com/motu-tool/mOTUs/master/motus/downloadDB.py - pattern: "downloadDB.py" - -output: - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - db: - type: directory - description: The mOTUs database directory - pattern: "db_mOTU" - -authors: - - "@jianhong" diff --git a/nextflow.config b/nextflow.config index 0ddd5c1..7f9f4c1 100644 --- a/nextflow.config +++ b/nextflow.config @@ -126,8 +126,6 @@ params { // mOTUs run_motus = false - download_motus_db = true - motus_downloaddb_script = 'https://raw.githubusercontent.com/motu-tool/mOTUs/master/motus/downloadDB.py' } // Load base.config by default for all pipelines diff --git a/nextflow_schema.json b/nextflow_schema.json index fc0d9e3..b380b84 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -430,14 +430,6 @@ }, "run_motus": { "type": "boolean" - }, - "download_motus_db": { - "type": "boolean" - }, - "motus_downloaddb_script": { - "type": "string", - "default": "https://raw.githubusercontent.com/motu-tool/mOTUs/master/motus/downloadDB.py", - "description": "mOTUs database download script path." } } } diff --git a/subworkflows/local/db_check.nf b/subworkflows/local/db_check.nf index 95eeefc..7b440c6 100644 --- a/subworkflows/local/db_check.nf +++ b/subworkflows/local/db_check.nf @@ -4,7 +4,6 @@ include { DATABASE_CHECK } from '../../modules/local/database_check' include { UNTAR } from '../../modules/nf-core/modules/untar/main' -include { MOTUS_DOWNLOADDB } from '../../modules/nf-core/modules/motus/downloaddb/main' workflow DB_CHECK { take: @@ -21,22 +20,6 @@ workflow DB_CHECK { .splitCsv ( header:true, sep:',' ) .map { create_db_channels(it) } - // Download database for mOTUs - if( params.run_motus ){ - check_motus_db = - parsed_samplesheet.filter{ it[0].tool == "motus" } - .ifEmpty{[]} - if( params.download_motus_db ){ - MOTUS_DOWNLOADDB( params.motus_downloaddb_script ) - check_motus_db = MOTUS_DOWNLOADDB.out.db - .map{[ - [tool: "motus", db_name: "db_mOTU", db_params: ''], - it - ]} - } - parsed_samplesheet = parsed_samplesheet.mix(check_motus_db) - } - ch_dbs_for_untar = parsed_samplesheet .branch { untar: it[1].toString().endsWith(".tar.gz") From 22f65eedc7054363d84f029851c17efa78567a15 Mon Sep 17 00:00:00 2001 From: JIANHONG OU Date: Fri, 3 Jun 2022 14:16:25 -0400 Subject: [PATCH 08/12] fix the download address for downloadDB for mOTUs. --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0088181..61911a8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -114,7 +114,7 @@ jobs: - name: Prepare the database run: | - wget https://github.com/motu-tool/mOTUs/blob/master/motus/downloadDB.py + wget https://raw.githubusercontent.com/motu-tool/mOTUs/master/motus/downloadDB.py python downloadDB.py echo 'tool,db_name,db_params,db_path\nmotus,db_mOTU,,db_mOTU' > 'database_motus.csv' From 3cd3d2dad365558df2a71d7afc19820304e8c2f4 Mon Sep 17 00:00:00 2001 From: JIANHONG OU Date: Fri, 3 Jun 2022 14:38:59 -0400 Subject: [PATCH 09/12] prettier write . --- .github/workflows/ci.yml | 84 ++++++++++++++++++++-------------------- 1 file changed, 42 insertions(+), 42 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 61911a8..c018947 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -73,51 +73,51 @@ jobs: nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results ${{ matrix.parameters }} motus: - name: Test mOTUs with workflow parameters - if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/taxprofiler') }} - runs-on: ubuntu-latest - env: - NXF_VER: ${{ matrix.nxf_ver }} - NXF_ANSI_LOG: false - strategy: - matrix: - # Nextflow versions - include: - # Test pipeline minimum Nextflow version - - NXF_VER: "21.10.3" - NXF_EDGE: "" - # Test latest edge release of Nextflow - - NXF_VER: "" - NXF_EDGE: "1" + name: Test mOTUs with workflow parameters + if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/taxprofiler') }} + runs-on: ubuntu-latest + env: + NXF_VER: ${{ matrix.nxf_ver }} + NXF_ANSI_LOG: false + strategy: + matrix: + # Nextflow versions + include: + # Test pipeline minimum Nextflow version + - NXF_VER: "21.10.3" + NXF_EDGE: "" + # Test latest edge release of Nextflow + - NXF_VER: "" + NXF_EDGE: "1" - steps: - - name: Check out pipeline code - uses: actions/checkout@v2 + steps: + - name: Check out pipeline code + uses: actions/checkout@v2 - - name: Install Nextflow - env: - NXF_VER: ${{ matrix.NXF_VER }} - # Uncomment only if the edge release is more recent than the latest stable release - # See https://github.com/nextflow-io/nextflow/issues/2467 - # NXF_EDGE: ${{ matrix.NXF_EDGE }} - run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ + - name: Install Nextflow + env: + NXF_VER: ${{ matrix.NXF_VER }} + # Uncomment only if the edge release is more recent than the latest stable release + # See https://github.com/nextflow-io/nextflow/issues/2467 + # NXF_EDGE: ${{ matrix.NXF_EDGE }} + run: | + wget -qO- get.nextflow.io | bash + sudo mv nextflow /usr/local/bin/ - - name: Show current locale - run: locale + - name: Show current locale + run: locale - - name: Set UTF-8 enabled locale - run: | - sudo locale-gen en_US.UTF-8 - sudo update-locale LANG=en_US.UTF-8 + - name: Set UTF-8 enabled locale + run: | + sudo locale-gen en_US.UTF-8 + sudo update-locale LANG=en_US.UTF-8 - - name: Prepare the database - run: | - wget https://raw.githubusercontent.com/motu-tool/mOTUs/master/motus/downloadDB.py - python downloadDB.py - echo 'tool,db_name,db_params,db_path\nmotus,db_mOTU,,db_mOTU' > 'database_motus.csv' + - name: Prepare the database + run: | + wget https://raw.githubusercontent.com/motu-tool/mOTUs/master/motus/downloadDB.py + python downloadDB.py > download_db_log.txt + echo 'tool,db_name,db_params,db_path\nmotus,db_mOTU,,db_mOTU' > 'database_motus.csv' - - name: Run pipeline with test data - run: | - nextflow run ${GITHUB_WORKSPACE} -profile test_motus,docker --outdir ./results --databases ./database_motus.csv + - name: Run pipeline with test data + run: | + nextflow run ${GITHUB_WORKSPACE} -profile test_motus,docker --outdir ./results --databases ./database_motus.csv From 9fa6ecad56a11e85c02b547998445e3311618298 Mon Sep 17 00:00:00 2001 From: JIANHONG OU Date: Fri, 3 Jun 2022 14:50:03 -0400 Subject: [PATCH 10/12] debug for database csv file --- .github/workflows/ci.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c018947..96f507c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -116,7 +116,11 @@ jobs: run: | wget https://raw.githubusercontent.com/motu-tool/mOTUs/master/motus/downloadDB.py python downloadDB.py > download_db_log.txt - echo 'tool,db_name,db_params,db_path\nmotus,db_mOTU,,db_mOTU' > 'database_motus.csv' + echo 'tool,db_name,db_params,db_path' > 'database_motus.csv' + echo 'motus,db_mOTU,,db_mOTU' >> 'database_motus.csv' + cat database_motus.csv + echo $PWD + ls $PWD - name: Run pipeline with test data run: | From 713a341e097cb7313627b95af88bfc05b03fd759 Mon Sep 17 00:00:00 2001 From: JIANHONG OU Date: Fri, 3 Jun 2022 15:22:33 -0400 Subject: [PATCH 11/12] clean up ci.yml --- .github/workflows/ci.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 96f507c..9bdd7fb 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -118,9 +118,6 @@ jobs: python downloadDB.py > download_db_log.txt echo 'tool,db_name,db_params,db_path' > 'database_motus.csv' echo 'motus,db_mOTU,,db_mOTU' >> 'database_motus.csv' - cat database_motus.csv - echo $PWD - ls $PWD - name: Run pipeline with test data run: | From dc48935651e5a32fabbd7e095cd034cdfec625db Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Tue, 7 Jun 2022 20:19:45 +0200 Subject: [PATCH 12/12] Apply suggestions from code review --- docs/usage.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index ea21e54..4090cf2 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -137,8 +137,8 @@ Expected (uncompressed) database files for each tool are as follows: is used to prepare the mOTUs database and create a file with the version information. The database download step can be time consuming and the database will be consisting with same release version of the mOTUs tools. The database for same version tools - can be thus reused for multiple runs. Users can download the database once and - assign the database with the table. + can be thus reused for multiple runs. Users can download the database once using the script above and + specify the path the database to the TSV table provided to `--databases`. ## Running the pipeline