Add motus/merge and biom support

2024-11-22 12:59:55 +00:00 · 2022-07-12 11:39:26 +02:00 · 2022-07-12 11:39:26 +02:00 · a0ee82bf43
commit a0ee82bf43
parent 8082c7d108
12 changed files with 241 additions and 13 deletions
--- a/conf/modules.config
+++ b/conf/modules.config
@ -391,12 +391,20 @@ process {
    }
    withName: MOTUS_PROFILE {
        ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
        publishDir = [
            path: { "${params.outdir}/motus/${meta.db_name}" },
            mode: params.publish_dir_mode
        ]
    }
    withName: MOTUS_MERGE {
        publishDir = [
            path: { "${params.outdir}/motus/" },
            mode: params.publish_dir_mode
        ]
    }
    withName: CUSTOM_DUMPSOFTWAREVERSIONS {
        publishDir = [
            path: { "${params.outdir}/pipeline_info" },
--- a/conf/test_motus.config
+++ b/conf/test_motus.config
@ -38,4 +38,5 @@ params {
    run_centrifuge                        = false
    run_diamond                           = false
    run_motus                             = true
    run_profile_standardisation           = true
 }
--- a/modules.json
+++ b/modules.json
@ -78,8 +78,11 @@
            "minimap2/index": {
                "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
            },
            "motus/merge": {
                "git_sha": "b02e648c221e1da17cb589eefe297e61ec9e9c49"
            },
            "motus/profile": {
-                "git_sha": "6b960f0e75bbb4d5bd301cd3875fa078d0eab4d1"
+                "git_sha": "b02e648c221e1da17cb589eefe297e61ec9e9c49"
            },
            "multiqc": {
                "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
--- a/modules/nf-core/modules/motus/merge/main.nf
+++ b/modules/nf-core/modules/motus/merge/main.nf
@ -0,0 +1,47 @@
 VERSION = '3.0.1'
 process MOTUS_MERGE {
    label 'process_low'
    conda (params.enable_conda ? "bioconda::motus=3.0.1" : null)
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://depot.galaxyproject.org/singularity/motus:3.0.1--pyhdfd78af_0':
        'quay.io/biocontainers/motus:3.0.1--pyhdfd78af_0' }"
    input:
    path input
    path db // to stop docker saying it can't find it... would have to have the module in upstream steps anyway
    path profile_version_yml, stageAs: 'profile_version.yml'
    val biom_format
    output:
    path("*.txt") , optional: true, emit: txt
    path("*.biom"), optional: true, emit: biom
    path "versions.yml" , emit: versions
    when:
    task.ext.when == null || task.ext.when
    script:
    def args = task.ext.args ?: ''
    def prefix = 'motus_merged'
    def cmd_input = input.size() > 1 ? "-i ${input.join(',')}" : input.isDirectory() ? "-d ${input}" : "-i ${input}"
    def output = biom_format ? "-B -o ${prefix}.biom" : "-o ${prefix}.txt"
    """
    motus \\
        merge \\
        -db $db \\
        ${cmd_input} \\
        $args \\
        ${output}
    ## Take version from the mOTUs/profile module output, as cannot reconstruct
    ## version without having database staged in this directory.
    VERSION=\$(cat ${profile_version_yml} | grep '/*motus:.*' | sed 's/.*otus: //g')
    cat <<-END_VERSIONS > versions.yml
    "${task.process}":
        motus: \$VERSION
    END_VERSIONS
    """
 }
--- a/modules/nf-core/modules/motus/merge/meta.yml
+++ b/modules/nf-core/modules/motus/merge/meta.yml
@ -0,0 +1,57 @@
 name: "motus_merge"
 description: Taxonomic meta-omics profiling using universal marker genes
 keywords:
  - classify
  - metagenomics
  - fastq
  - taxonomic profiling
  - merging
  - merge
  - otu table
 tools:
  - "motus":
      description: "Marker gene-based OTU (mOTU) profiling"
      homepage: "https://motu-tool.org/"
      documentation: "https://github.com/motu-tool/mOTUs/wiki"
      tool_dev_url: "https://github.com/motu-tool/mOTUs"
      doi: "10.1038/s41467-019-08844-4"
      licence: "['GPL v3']"
 input:
  - input:
      type: file
      description: |
        List of output files (more than one) from motus profile,
        or a single directory containing motus output files.
  - db:
      type: directory
      description: |
        mOTUs database downloaded by `motus downloadDB`
        pattern: "db_mOTU/"
  - profile_version_yml:
      type: file
      description: |
        A single versions.yml file output from motus/profile. motus/merge cannot reconstruct
        this itself without having the motus database present and configured with the tool
        so here we take it from what is already reported by the upstream module.
      pattern: "versions.yml"
  - biom_format:
      type: boolean
      description: Whether to save output OTU table in biom format
 output:
  - versions:
      type: file
      description: File containing software versions
      pattern: "versions.yml"
  - txt:
      type: file
      description: OTU table in txt format, if BIOM format not requested
      pattern: "*.txt"
  - biom:
      type: file
      description: OTU table in biom format, if BIOM format requested
      pattern: "*.biom"
 authors:
  - "@jfy133"
--- a/modules/nf-core/modules/motus/profile/main.nf
+++ b/modules/nf-core/modules/motus/profile/main.nf
@ -48,7 +48,7 @@ process MOTUS_PROFILE {
    fi
    cat <<-END_VERSIONS > versions.yml
    "${task.process}":
-        mOTUs: \$VERSION
+        motus: \$VERSION
    END_VERSIONS
    """
 }
--- a/nextflow.config
+++ b/nextflow.config
@ -132,6 +132,10 @@ params {
    // krona
    run_krona                  = false
    krona_taxonomy_directory   = null
    // profile standardisation
    run_profile_standardisation = false
    generate_biom_output                 = false
 }
 // Load base.config by default for all pipelines
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@ -10,7 +10,10 @@
            "type": "object",
            "fa_icon": "fas fa-terminal",
            "description": "Define where the pipeline should find input data and save output data.",
-            "required": ["input", "outdir"],
+            "required": [
                "input",
                "outdir"
            ],
            "properties": {
                "input": {
                    "type": "string",
@ -52,7 +55,8 @@
                    "type": "string",
                    "description": "Name of iGenomes reference.",
                    "fa_icon": "fas fa-book",
-                    "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details."
+                    "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details.",
                    "hidden": true
                },
                "igenomes_base": {
                    "type": "string",
@ -173,7 +177,14 @@
                    "description": "Method used to save pipeline results to output directory.",
                    "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.",
                    "fa_icon": "fas fa-copy",
-                    "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"],
+                    "enum": [
                        "symlink",
                        "rellink",
                        "link",
                        "copy",
                        "copyNoFollow",
                        "move"
                    ],
                    "hidden": true
                },
                "email_on_fail": {
@ -287,7 +298,10 @@
        "shortread_qc_tool": {
            "type": "string",
            "default": "fastp",
-            "enum": ["fastp", "adapterremoval"]
+            "enum": [
                "fastp",
                "adapterremoval"
            ]
        },
        "shortread_qc_skipadaptertrim": {
            "type": "boolean"
@ -313,7 +327,11 @@
        "shortread_complexityfilter_tool": {
            "type": "string",
            "default": "bbduk",
-            "enum": ["bbduk", "prinseqplusplus", "fastp"]
+            "enum": [
                "bbduk",
                "prinseqplusplus",
                "fastp"
            ]
        },
        "shortread_complexityfilter_bbduk_windowsize": {
            "type": "integer",
@ -329,7 +347,10 @@
        "shortread_complexityfilter_prinseqplusplus_mode": {
            "type": "string",
            "default": "entropy",
-            "enum": ["entropy", "dust"]
+            "enum": [
                "entropy",
                "dust"
            ]
        },
        "shortread_complexityfilter_prinseqplusplus_dustscore": {
            "type": "number",
@ -385,7 +406,14 @@
        "kaiju_taxon_name": {
            "type": "string",
            "default": "species",
-            "enum": ["phylum", "class", "order", "family", "genus", "species"]
+            "enum": [
                "phylum",
                "class",
                "order",
                "family",
                "genus",
                "species"
            ]
        },
        "run_diamond": {
            "type": "boolean"
@ -393,7 +421,15 @@
        "diamond_output_format": {
            "type": "string",
            "default": "tsv",
-            "enum": ["blast", "xml", "txt", "daa", "sam", "tsv", "paf"]
+            "enum": [
                "blast",
                "xml",
                "txt",
                "daa",
                "sam",
                "tsv",
                "paf"
            ]
        },
        "longread_hostremoval_index": {
            "type": "string",
@ -444,7 +480,13 @@
        },
        "krona_taxonomy_directory": {
            "type": "string",
-            "default": null
+            "default": "None"
        },
        "run_profile_standardisation": {
            "type": "boolean"
        },
        "generate_biom_output": {
            "type": "boolean"
        }
    }
 }
--- a/subworkflows/local/profiling.nf
+++ b/subworkflows/local/profiling.nf
@ -234,5 +234,6 @@ workflow PROFILING {
    classifications = ch_raw_classifications
    profiles        = ch_raw_profiles    // channel: [ val(meta), [ reads ] ] - should be text files or biom
    versions        = ch_versions          // channel: [ versions.yml ]
    motu_version    = MOTUS_PROFILE.out.versions.first()
    mqc             = ch_multiqc_files
 }
--- a/subworkflows/local/standardisation_profiles.nf
+++ b/subworkflows/local/standardisation_profiles.nf
@ -0,0 +1,56 @@
 //
 // Create Krona visualizations
 //
 include { MOTUS_MERGE } from '../../modules/nf-core/modules/motus/merge/main'
 workflow STANDARDISATION_PROFILES {
    take:
    classifications
    profiles
    databases
    motu_version
    main:
    ch_standardised_tables = Channel.empty()
    ch_versions            = Channel.empty()
    /*
        Split profile results based on tool they come from
    */
    ch_input_profiles = profiles
        .branch {
            motus: it[0]['tool'] == 'motus'
            unknown: true
        }
    ch_input_classifications = classifications
        .branch {
            unknown: true
        }
    ch_input_databases = databases
        .branch {
            motus: it[0]['tool'] == 'motus'
            unknown: true
        }
    /*
        Standardise and aggregate
    */
    // mOTUs has a 'single' database, and cannot create custom ones.
    // Therefore removing db info here, and publish merged at root mOTUs results
    // directory
    MOTUS_MERGE ( ch_input_profiles.motus.map{it[1]}.collect(), ch_input_databases.motus.map{it[1]}, motu_version, params.generate_biom_output )
    if ( params.generate_biom_output ) {
        ch_standardised_tables = ch_standardised_tables.mix ( MOTUS_MERGE.out.biom )
    } else {
        ch_standardised_tables = ch_standardised_tables.mix ( MOTUS_MERGE.out.txt )
    }
    ch_versions = ch_versions.mix( MOTUS_MERGE.out.versions )
    emit:
    tables = ch_standardised_tables
    versions = ch_versions
 }
--- a/subworkflows/local/visualization_krona.nf
+++ b/subworkflows/local/visualization_krona.nf
@ -78,7 +78,7 @@ workflow VISUALIZATION_KRONA {
    ch_krona_text_for_import = ch_cleaned_krona_text
        .map{[[id: it[0]['db_name'], tool: it[0]['tool']], it[1]]}
        .groupTuple()
-        .dump(tag: "text")
+
    KRONA_KTIMPORTTEXT( ch_krona_text_for_import )
    ch_krona_html = ch_krona_html.mix( KRONA_KTIMPORTTEXT.out.html )
    ch_versions = ch_versions.mix( KRONA_KTIMPORTTEXT.out.versions.first() )
@ -92,7 +92,7 @@ workflow VISUALIZATION_KRONA {
        ch_krona_taxonomy_for_input = GUNZIP.out.gunzip
            .map{[[id: it[0]['db_name'], tool: it[0]['tool']], it[1]]}
            .groupTuple()
-            .dump(tag: "taxonomy")
+
        KRONA_KTIMPORTTAXONOMY ( ch_krona_taxonomy_for_input, file(params.krona_taxonomy_directory, checkExists: true) )
        ch_krona_html.mix( KRONA_KTIMPORTTAXONOMY.out.html )
        ch_versions = ch_versions.mix( MEGAN_RMA2INFO.out.versions.first() )
--- a/workflows/taxprofiler.nf
+++ b/workflows/taxprofiler.nf
@ -65,6 +65,7 @@ include { LONGREAD_HOSTREMOVAL          } from '../subworkflows/local/longread_h
 include { SHORTREAD_COMPLEXITYFILTERING } from '../subworkflows/local/shortread_complexityfiltering'
 include { PROFILING                     } from '../subworkflows/local/profiling'
 include { VISUALIZATION_KRONA           } from '../subworkflows/local/visualization_krona'
 include { STANDARDISATION_PROFILES      } from '../subworkflows/local/standardisation_profiles'
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -220,6 +221,14 @@ workflow TAXPROFILER {
        ch_versions = ch_versions.mix( VISUALIZATION_KRONA.out.versions )
    }
    /*
        SUBWORKFLOW: PROFILING STANDARDISATION
    */
    if ( params.run_profile_standardisation ) {
        STANDARDISATION_PROFILES ( PROFILING.out.classifications, PROFILING.out.profiles, DB_CHECK.out.dbs, PROFILING.out.motu_version )
        ch_versions = ch_versions.mix( STANDARDISATION_PROFILES.out.versions )
    }
    /*
        MODULE: MultiQC
    */