Merge pull request #82 from jianhong/motus

add motus profile.
2024-11-26 02:09:54 +00:00 · 2022-07-04 14:16:12 +02:00 · 2022-07-04 14:16:12 +02:00 · 1eba859bf8
commit 1eba859bf8
parent 913080a5b1 26e31c782d
14 changed files with 258 additions and 3 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@ -71,3 +71,54 @@ jobs:
        # Remember that you can parallelise this by using strategy.matrix
        run: |
          nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results ${{ matrix.parameters }}
+
+  motus:
+    name: Test mOTUs with workflow parameters
+    if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/taxprofiler') }}
+    runs-on: ubuntu-latest
+    env:
+      NXF_VER: ${{ matrix.nxf_ver }}
+      NXF_ANSI_LOG: false
+    strategy:
+      matrix:
+        # Nextflow versions
+        include:
+          # Test pipeline minimum Nextflow version
+          - NXF_VER: "21.10.3"
+            NXF_EDGE: ""
+          # Test latest edge release of Nextflow
+          - NXF_VER: ""
+            NXF_EDGE: "1"
+
+    steps:
+      - name: Check out pipeline code
+        uses: actions/checkout@v2
+
+      - name: Install Nextflow
+        env:
+          NXF_VER: ${{ matrix.NXF_VER }}
+          # Uncomment only if the edge release is more recent than the latest stable release
+          # See https://github.com/nextflow-io/nextflow/issues/2467
+          # NXF_EDGE: ${{ matrix.NXF_EDGE }}
+        run: |
+          wget -qO- get.nextflow.io | bash
+          sudo mv nextflow /usr/local/bin/
+
+      - name: Show current locale
+        run: locale
+
+      - name: Set UTF-8 enabled locale
+        run: |
+          sudo locale-gen en_US.UTF-8
+          sudo update-locale LANG=en_US.UTF-8
+
+      - name: Prepare the database
+        run: |
+          wget https://raw.githubusercontent.com/motu-tool/mOTUs/master/motus/downloadDB.py
+          python downloadDB.py > download_db_log.txt
+          echo 'tool,db_name,db_params,db_path' > 'database_motus.csv'
+          echo 'motus,db_mOTU,,db_mOTU' >> 'database_motus.csv'
+
+      - name: Run pipeline with test data
+        run: |
+          nextflow run ${GITHUB_WORKSPACE} -profile test_motus,docker --outdir ./results --databases ./database_motus.csv
--- a/conf/modules.config
+++ b/conf/modules.config
@ -365,6 +365,13 @@ process {
        ]
    }

+    withName: MOTUS_PROFILE {
+        publishDir = [
+            path: { "${params.outdir}/motus/${meta.db_name}" },
+            mode: params.publish_dir_mode
+        ]
+    }
+
    withName: CUSTOM_DUMPSOFTWAREVERSIONS {
        publishDir = [
            path: { "${params.outdir}/pipeline_info" },
--- a/conf/test.config
+++ b/conf/test.config
@ -37,6 +37,7 @@ params {
    run_metaphlan3                        = true
    run_centrifuge                        = true
    run_diamond                           = true
+    run_motus                             = false
    run_krona                             = true
    malt_save_reads                       = true
    kraken2_save_reads                    = true
--- a/conf/test_motus.config
+++ b/conf/test_motus.config
@ -0,0 +1,41 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Nextflow config file for running minimal tests
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Defines input files and everything required to run a fast and simple pipeline test.
+
+    Use as follows:
+        nextflow run nf-core/taxprofiler -profile test,<docker/singularity> --outdir <OUTDIR>
+
+----------------------------------------------------------------------------------------
+*/
+
+params {
+    config_profile_name        = 'mOTUs Test profile'
+    config_profile_description = 'Minimal test to check mOTUs function'
+
+    // Limit resources so that this can run on GitHub Actions
+    max_cpus   = 2
+    max_memory = '6.GB'
+    max_time   = '6.h'
+
+    // Input data
+    // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets
+    // TODO nf-core: Give any required params for the test so that command line flags are not needed
+    input                                 = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
+    databases                             = 'database_motus.csv'
+    perform_shortread_clipmerge           = false
+    perform_longread_clip                 = false
+    perform_shortread_complexityfilter    = false
+    perform_shortread_hostremoval         = false
+    perform_longread_hostremoval          = false
+    perform_runmerging                    = false
+    hostremoval_reference                 = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
+    run_kaiju                             = false
+    run_kraken2                           = false
+    run_malt                              = false
+    run_metaphlan3                        = false
+    run_centrifuge                        = false
+    run_diamond                           = false
+    run_motus                             = true
+}
--- a/conf/test_nopreprocessing.config
+++ b/conf/test_nopreprocessing.config
@ -37,6 +37,7 @@ params {
    run_metaphlan3                        = true
    run_centrifuge                        = true
    run_diamond                           = true
+    run_motus                             = false
    run_krona                             = true
 }

--- a/conf/test_noprofiling.config
+++ b/conf/test_noprofiling.config
@ -37,6 +37,7 @@ params {
    run_metaphlan3                        = false
    run_centrifuge                        = false
    run_diamond                           = false
+    run_motus                             = false
 }

 process {
--- a/docs/usage.md
+++ b/docs/usage.md
@ -79,6 +79,7 @@ kraken2,db1,,/<path>/<to>/kraken2/testdb-kraken2.tar.gz
 kraken2,db2,--quick,/<path>/<to>/kraken2/testdb-kraken2.tar.gz
 centrifuge,db1,,/<path>/<to>/centrifuge/minigut_cf.tar.gz
 metaphlan3,db1,,/<path>/<to>/metaphlan3/metaphlan_database/
+motus,db_mOTU,,/<path>/<to>/motus/motus_database/
 ```

 Column specifications are as follows:
@ -133,6 +134,13 @@ Expected (uncompressed) database files for each tool are as follows:
 - **DIAMOND** output of `diamond makedb`. Note: requires building with taxonomy files
  to generate taxonomic profile. See [DIAMOND documentation](https://github.com/bbuchfink/diamond/wiki/3.-Command-line-options#makedb-options). A file named:
  - `<database_name>.dmnd`
+- **mOTUs** is composed of code and database together. The mOTUs tools
+  [`downloadDB`](https://github.com/motu-tool/mOTUs/blob/master/motus/downloadDB.py)
+  is used to prepare the mOTUs database and create a file with the version information.
+  The database download step can be time consuming and the database will be consisting
+  with same release version of the mOTUs tools. The database for same version tools
+  can be thus reused for multiple runs. Users can download the database once using the script above and
+  specify the path the database to the TSV table provided to `--databases`.

 ## Running the pipeline

--- a/modules.json
+++ b/modules.json
@ -72,6 +72,9 @@
            "minimap2/index": {
                "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
            },
+            "motus/profile": {
+                "git_sha": "6b960f0e75bbb4d5bd301cd3875fa078d0eab4d1"
+            },
            "multiqc": {
                "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
            },
--- a/modules/nf-core/modules/motus/profile/main.nf
+++ b/modules/nf-core/modules/motus/profile/main.nf
@ -0,0 +1,54 @@
+process MOTUS_PROFILE {
+    tag "$meta.id"
+    label 'process_medium'
+
+    conda (params.enable_conda ? "bioconda::motus=3.0.1" : null)
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/motus:3.0.1--pyhdfd78af_0':
+        'quay.io/biocontainers/motus:3.0.1--pyhdfd78af_0' }"
+
+    input:
+    tuple val(meta), path(reads)
+    path db
+
+    output:
+    tuple val(meta), path("*.out"), emit: out
+    tuple val(meta), path("*.bam"), optional: true, emit: bam
+    tuple val(meta), path("*.mgc"), optional: true, emit: mgc
+    path "versions.yml"           , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def inputs = reads[0].getExtension() == 'bam' ?
+                    "-i ${reads}" :
+                    reads[0].getExtension() == 'mgc' ? "-m $reads" :
+                        meta.single_end ?
+                            "-s $reads" : "-f ${reads[0]} -r ${reads[1]}"
+    def refdb = db ? "-db ${db}" : ""
+    """
+    motus profile \\
+        $args \\
+        $inputs \\
+        $refdb \\
+        -t $task.cpus \\
+        -n $prefix \\
+        -o ${prefix}.out
+
+    ## mOTUs version number is not available from command line.
+    ## mOTUs save the version number in index database folder.
+    ## mOTUs will check the database version is same version as exec version.
+    if [ "$db" == "" ]; then
+        VERSION=\$(echo \$(motus -h 2>&1) | sed 's/^.*Version: //; s/References.*\$//')
+    else
+        VERSION=\$(grep motus $db/db_mOTU_versions | sed 's/motus\\t//g')
+    fi
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        mOTUs: \$VERSION
+    END_VERSIONS
+    """
+}
--- a/modules/nf-core/modules/motus/profile/meta.yml
+++ b/modules/nf-core/modules/motus/profile/meta.yml
@ -0,0 +1,61 @@
+name: "motus_profile"
+description: Taxonomic meta-omics profiling using universal marker genes
+keywords:
+  - classify
+  - metagenomics
+  - fastq
+  - taxonomic profiling
+tools:
+  - "motus":
+      description: "Marker gene-based OTU (mOTU) profiling"
+      homepage: "https://motu-tool.org/"
+      documentation: "https://github.com/motu-tool/mOTUs/wiki"
+      tool_dev_url: "https://github.com/motu-tool/mOTUs"
+      doi: "10.1038/s41467-019-08844-4"
+      licence: "['GPL v3']"
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - reads:
+      type: file
+      description: |
+        List of input fastq/fasta files of size 1 and 2 for single-end and paired-end data,
+        respectively.
+        Or the intermediate bam file mapped by bwa to the mOTUs database or
+        the output bam file from motus profile.
+        Or the intermediate mgc read counts table.
+      pattern: "*.{fastq,fq,fasta,fa,fastq.gz,fq.gz,fasta.gz,fa.gz,.bam,.mgc}"
+  - db:
+      type: directory
+      description: |
+        mOTUs database downloaded by `motus downloadDB`
+
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - out:
+      type: file
+      description: Results with taxonomic classification of each read
+      pattern: "*.out"
+  - bam:
+      type: file
+      description: Optional intermediate sorted BAM file from BWA
+      pattern: "*.{bam}"
+  - mgc:
+      type: file
+      description: Optional intermediate mgc read count table file saved with `-M`.
+      pattern: "*.{mgc}"
+
+authors:
+  - "@jianhong"
--- a/nextflow.config
+++ b/nextflow.config
@ -126,6 +126,9 @@ params {
    diamond_output_format      = 'tsv'  // TSV is only format with taxonomic information apparently
    diamond_save_reads         = false // this will override default diamond output format so no taxonomic profile is generated!

+    // mOTUs
+    run_motus                  = false
+
    // krona
    run_krona                  = false
 }
@ -200,6 +203,7 @@ profiles {
    test_full { includeConfig 'conf/test_full.config' }
    test_noprofiling { includeConfig 'conf/test_noprofiling.config' }
    test_nopreprocessing { includeConfig 'conf/test_nopreprocessing.config' }
+    test_motus { includeConfig 'conf/test_motus.config' }
 }

 // Load igenomes.config if required
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@ -421,6 +421,9 @@
            "type": "integer",
            "default": 500000000
        },
+        "run_motus": {
+            "type": "boolean"
+        },
        "malt_save_reads": {
            "type": "boolean"
        },
--- a/subworkflows/local/profiling.nf
+++ b/subworkflows/local/profiling.nf
@ -11,7 +11,7 @@ include { METAPHLAN3                  } from '../../modules/nf-core/modules/meta
 include { KAIJU_KAIJU                 } from '../../modules/nf-core/modules/kaiju/kaiju/main'
 include { KAIJU_KAIJU2TABLE           } from '../../modules/nf-core/modules/kaiju/kaiju2table/main'
 include { DIAMOND_BLASTX              } from '../../modules/nf-core/modules/diamond/blastx/main'
-
+include { MOTUS_PROFILE               } from '../../modules/nf-core/modules/motus/profile/main'

 workflow PROFILING {
    take:
@ -45,6 +45,7 @@ workflow PROFILING {
                centrifuge: it[2]['tool'] == 'centrifuge'
                kaiju: it[2]['tool'] == 'kaiju'
                diamond: it[2]['tool'] == 'diamond'
+                motus: it[2]['tool'] == 'motus'
                unknown: true
            }

@ -210,6 +211,25 @@ workflow PROFILING {

    }

+    if ( params.run_motus ) {
+
+        ch_input_for_motus = ch_input_for_profiling.motus
+                                .filter{
+                                    if (it[0].is_fasta) log.warn "[nf-core/taxprofiler] mOTUs currently does not accept FASTA files as input. Skipping mOTUs for sample ${it[0].id}."
+                                    !it[0].is_fasta
+                                }
+                                .multiMap {
+                                    it ->
+                                        reads: [it[0] + it[2], it[1]]
+                                        db: it[3]
+                                }
+
+        MOTUS_PROFILE ( ch_input_for_motus.reads, ch_input_for_motus.db )
+        ch_versions        = ch_versions.mix( MOTUS_PROFILE.out.versions.first() )
+        ch_raw_profiles    = ch_raw_profiles.mix( MOTUS_PROFILE.out.out )
+
+    }
+
    emit:
    classifications = ch_raw_classifications
    profiles        = ch_raw_profiles    // channel: [ val(meta), [ reads ] ] - should be text files or biom