Merge pull request #82 from jianhong/motus

add motus profile.
2024-11-23 02:39:56 +00:00 · 2022-07-04 14:16:12 +02:00 · 2022-07-04 14:16:12 +02:00 · 1eba859bf8
commit 1eba859bf8
parent 913080a5b1 26e31c782d
14 changed files with 258 additions and 3 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@ -71,3 +71,54 @@ jobs:
        # Remember that you can parallelise this by using strategy.matrix
        run: |
          nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results ${{ matrix.parameters }}
  motus:
    name: Test mOTUs with workflow parameters
    if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/taxprofiler') }}
    runs-on: ubuntu-latest
    env:
      NXF_VER: ${{ matrix.nxf_ver }}
      NXF_ANSI_LOG: false
    strategy:
      matrix:
        # Nextflow versions
        include:
          # Test pipeline minimum Nextflow version
          - NXF_VER: "21.10.3"
            NXF_EDGE: ""
          # Test latest edge release of Nextflow
          - NXF_VER: ""
            NXF_EDGE: "1"
    steps:
      - name: Check out pipeline code
        uses: actions/checkout@v2
      - name: Install Nextflow
        env:
          NXF_VER: ${{ matrix.NXF_VER }}
          # Uncomment only if the edge release is more recent than the latest stable release
          # See https://github.com/nextflow-io/nextflow/issues/2467
          # NXF_EDGE: ${{ matrix.NXF_EDGE }}
        run: |
          wget -qO- get.nextflow.io | bash
          sudo mv nextflow /usr/local/bin/
      - name: Show current locale
        run: locale
      - name: Set UTF-8 enabled locale
        run: |
          sudo locale-gen en_US.UTF-8
          sudo update-locale LANG=en_US.UTF-8
      - name: Prepare the database
        run: |
          wget https://raw.githubusercontent.com/motu-tool/mOTUs/master/motus/downloadDB.py
          python downloadDB.py > download_db_log.txt
          echo 'tool,db_name,db_params,db_path' > 'database_motus.csv'
          echo 'motus,db_mOTU,,db_mOTU' >> 'database_motus.csv'
      - name: Run pipeline with test data
        run: |
          nextflow run ${GITHUB_WORKSPACE} -profile test_motus,docker --outdir ./results --databases ./database_motus.csv
--- a/conf/modules.config
+++ b/conf/modules.config
@ -365,6 +365,13 @@ process {
        ]
    }
    withName: MOTUS_PROFILE {
        publishDir = [
            path: { "${params.outdir}/motus/${meta.db_name}" },
            mode: params.publish_dir_mode
        ]
    }
    withName: CUSTOM_DUMPSOFTWAREVERSIONS {
        publishDir = [
            path: { "${params.outdir}/pipeline_info" },
--- a/conf/test.config
+++ b/conf/test.config
@ -37,6 +37,7 @@ params {
    run_metaphlan3                        = true
    run_centrifuge                        = true
    run_diamond                           = true
    run_motus                             = false
    run_krona                             = true
    malt_save_reads                       = true
    kraken2_save_reads                    = true
--- a/conf/test_motus.config
+++ b/conf/test_motus.config
@ -0,0 +1,41 @@
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    Nextflow config file for running minimal tests
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    Defines input files and everything required to run a fast and simple pipeline test.
    Use as follows:
        nextflow run nf-core/taxprofiler -profile test,<docker/singularity> --outdir <OUTDIR>
 ----------------------------------------------------------------------------------------
 */
 params {
    config_profile_name        = 'mOTUs Test profile'
    config_profile_description = 'Minimal test to check mOTUs function'
    // Limit resources so that this can run on GitHub Actions
    max_cpus   = 2
    max_memory = '6.GB'
    max_time   = '6.h'
    // Input data
    // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets
    // TODO nf-core: Give any required params for the test so that command line flags are not needed
    input                                 = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
    databases                             = 'database_motus.csv'
    perform_shortread_clipmerge           = false
    perform_longread_clip                 = false
    perform_shortread_complexityfilter    = false
    perform_shortread_hostremoval         = false
    perform_longread_hostremoval          = false
    perform_runmerging                    = false
    hostremoval_reference                 = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
    run_kaiju                             = false
    run_kraken2                           = false
    run_malt                              = false
    run_metaphlan3                        = false
    run_centrifuge                        = false
    run_diamond                           = false
    run_motus                             = true
 }
--- a/conf/test_nopreprocessing.config
+++ b/conf/test_nopreprocessing.config
@ -37,6 +37,7 @@ params {
    run_metaphlan3                        = true
    run_centrifuge                        = true
    run_diamond                           = true
    run_motus                             = false
    run_krona                             = true
 }
--- a/conf/test_noprofiling.config
+++ b/conf/test_noprofiling.config
@ -37,6 +37,7 @@ params {
    run_metaphlan3                        = false
    run_centrifuge                        = false
    run_diamond                           = false
    run_motus                             = false
 }
 process {
--- a/docs/usage.md
+++ b/docs/usage.md
@ -79,6 +79,7 @@ kraken2,db1,,/<path>/<to>/kraken2/testdb-kraken2.tar.gz
 kraken2,db2,--quick,/<path>/<to>/kraken2/testdb-kraken2.tar.gz
 centrifuge,db1,,/<path>/<to>/centrifuge/minigut_cf.tar.gz
 metaphlan3,db1,,/<path>/<to>/metaphlan3/metaphlan_database/
 motus,db_mOTU,,/<path>/<to>/motus/motus_database/
 ```
 Column specifications are as follows:
@ -133,6 +134,13 @@ Expected (uncompressed) database files for each tool are as follows:
 - **DIAMOND** output of `diamond makedb`. Note: requires building with taxonomy files
  to generate taxonomic profile. See [DIAMOND documentation](https://github.com/bbuchfink/diamond/wiki/3.-Command-line-options#makedb-options). A file named:
  - `<database_name>.dmnd`
 - **mOTUs** is composed of code and database together. The mOTUs tools
  [`downloadDB`](https://github.com/motu-tool/mOTUs/blob/master/motus/downloadDB.py)
  is used to prepare the mOTUs database and create a file with the version information.
  The database download step can be time consuming and the database will be consisting
  with same release version of the mOTUs tools. The database for same version tools
  can be thus reused for multiple runs. Users can download the database once using the script above and
  specify the path the database to the TSV table provided to `--databases`.
 ## Running the pipeline
--- a/modules.json
+++ b/modules.json
@ -72,6 +72,9 @@
            "minimap2/index": {
                "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
            },
            "motus/profile": {
                "git_sha": "6b960f0e75bbb4d5bd301cd3875fa078d0eab4d1"
            },
            "multiqc": {
                "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
            },
--- a/modules/nf-core/modules/motus/profile/main.nf
+++ b/modules/nf-core/modules/motus/profile/main.nf
@ -0,0 +1,54 @@
 process MOTUS_PROFILE {
    tag "$meta.id"
    label 'process_medium'
    conda (params.enable_conda ? "bioconda::motus=3.0.1" : null)
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://depot.galaxyproject.org/singularity/motus:3.0.1--pyhdfd78af_0':
        'quay.io/biocontainers/motus:3.0.1--pyhdfd78af_0' }"
    input:
    tuple val(meta), path(reads)
    path db
    output:
    tuple val(meta), path("*.out"), emit: out
    tuple val(meta), path("*.bam"), optional: true, emit: bam
    tuple val(meta), path("*.mgc"), optional: true, emit: mgc
    path "versions.yml"           , emit: versions
    when:
    task.ext.when == null || task.ext.when
    script:
    def args = task.ext.args ?: ''
    def prefix = task.ext.prefix ?: "${meta.id}"
    def inputs = reads[0].getExtension() == 'bam' ?
                    "-i ${reads}" :
                    reads[0].getExtension() == 'mgc' ? "-m $reads" :
                        meta.single_end ?
                            "-s $reads" : "-f ${reads[0]} -r ${reads[1]}"
    def refdb = db ? "-db ${db}" : ""
    """
    motus profile \\
        $args \\
        $inputs \\
        $refdb \\
        -t $task.cpus \\
        -n $prefix \\
        -o ${prefix}.out
    ## mOTUs version number is not available from command line.
    ## mOTUs save the version number in index database folder.
    ## mOTUs will check the database version is same version as exec version.
    if [ "$db" == "" ]; then
        VERSION=\$(echo \$(motus -h 2>&1) | sed 's/^.*Version: //; s/References.*\$//')
    else
        VERSION=\$(grep motus $db/db_mOTU_versions | sed 's/motus\\t//g')
    fi
    cat <<-END_VERSIONS > versions.yml
    "${task.process}":
        mOTUs: \$VERSION
    END_VERSIONS
    """
 }
--- a/modules/nf-core/modules/motus/profile/meta.yml
+++ b/modules/nf-core/modules/motus/profile/meta.yml
@ -0,0 +1,61 @@
 name: "motus_profile"
 description: Taxonomic meta-omics profiling using universal marker genes
 keywords:
  - classify
  - metagenomics
  - fastq
  - taxonomic profiling
 tools:
  - "motus":
      description: "Marker gene-based OTU (mOTU) profiling"
      homepage: "https://motu-tool.org/"
      documentation: "https://github.com/motu-tool/mOTUs/wiki"
      tool_dev_url: "https://github.com/motu-tool/mOTUs"
      doi: "10.1038/s41467-019-08844-4"
      licence: "['GPL v3']"
 input:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - reads:
      type: file
      description: |
        List of input fastq/fasta files of size 1 and 2 for single-end and paired-end data,
        respectively.
        Or the intermediate bam file mapped by bwa to the mOTUs database or
        the output bam file from motus profile.
        Or the intermediate mgc read counts table.
      pattern: "*.{fastq,fq,fasta,fa,fastq.gz,fq.gz,fasta.gz,fa.gz,.bam,.mgc}"
  - db:
      type: directory
      description: |
        mOTUs database downloaded by `motus downloadDB`
 output:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - versions:
      type: file
      description: File containing software versions
      pattern: "versions.yml"
  - out:
      type: file
      description: Results with taxonomic classification of each read
      pattern: "*.out"
  - bam:
      type: file
      description: Optional intermediate sorted BAM file from BWA
      pattern: "*.{bam}"
  - mgc:
      type: file
      description: Optional intermediate mgc read count table file saved with `-M`.
      pattern: "*.{mgc}"
 authors:
  - "@jianhong"
--- a/nextflow.config
+++ b/nextflow.config
@ -126,6 +126,9 @@ params {
    diamond_output_format      = 'tsv'  // TSV is only format with taxonomic information apparently
    diamond_save_reads         = false // this will override default diamond output format so no taxonomic profile is generated!
    // mOTUs
    run_motus                  = false
    // krona
    run_krona                  = false
 }
@ -200,6 +203,7 @@ profiles {
    test_full { includeConfig 'conf/test_full.config' }
    test_noprofiling { includeConfig 'conf/test_noprofiling.config' }
    test_nopreprocessing { includeConfig 'conf/test_nopreprocessing.config' }
    test_motus { includeConfig 'conf/test_motus.config' }
 }
 // Load igenomes.config if required
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@ -421,6 +421,9 @@
            "type": "integer",
            "default": 500000000
        },
        "run_motus": {
            "type": "boolean"
        },
        "malt_save_reads": {
            "type": "boolean"
        },
--- a/subworkflows/local/profiling.nf
+++ b/subworkflows/local/profiling.nf
@ -11,7 +11,7 @@ include { METAPHLAN3                  } from '../../modules/nf-core/modules/meta
 include { KAIJU_KAIJU                 } from '../../modules/nf-core/modules/kaiju/kaiju/main'
 include { KAIJU_KAIJU2TABLE           } from '../../modules/nf-core/modules/kaiju/kaiju2table/main'
 include { DIAMOND_BLASTX              } from '../../modules/nf-core/modules/diamond/blastx/main'
-
+include { MOTUS_PROFILE               } from '../../modules/nf-core/modules/motus/profile/main'
 workflow PROFILING {
    take:
@ -45,6 +45,7 @@ workflow PROFILING {
                centrifuge: it[2]['tool'] == 'centrifuge'
                kaiju: it[2]['tool'] == 'kaiju'
                diamond: it[2]['tool'] == 'diamond'
                motus: it[2]['tool'] == 'motus'
                unknown: true
            }
@ -210,6 +211,25 @@ workflow PROFILING {
    }
    if ( params.run_motus ) {
        ch_input_for_motus = ch_input_for_profiling.motus
                                .filter{
                                    if (it[0].is_fasta) log.warn "[nf-core/taxprofiler] mOTUs currently does not accept FASTA files as input. Skipping mOTUs for sample ${it[0].id}."
                                    !it[0].is_fasta
                                }
                                .multiMap {
                                    it ->
                                        reads: [it[0] + it[2], it[1]]
                                        db: it[3]
                                }
        MOTUS_PROFILE ( ch_input_for_motus.reads, ch_input_for_motus.db )
        ch_versions        = ch_versions.mix( MOTUS_PROFILE.out.versions.first() )
        ch_raw_profiles    = ch_raw_profiles.mix( MOTUS_PROFILE.out.out )
    }
    emit:
    classifications = ch_raw_classifications
    profiles        = ch_raw_profiles    // channel: [ val(meta), [ reads ] ] - should be text files or biom