Start work

2024-11-22 10:39:54 +00:00 · 2022-04-13 11:49:35 +02:00 · 2022-04-13 11:49:35 +02:00 · de57340526
commit de57340526
parent 1ac5a129a8
6 changed files with 146 additions and 10 deletions
--- a/modules.json
+++ b/modules.json
@ -30,6 +30,9 @@
            "malt/run": {
                "git_sha": "72b96f4e504eef673f2b5c13560a9d90b669129b"
            },
+            "megan/rma2info": {
+                "git_sha": "2d38566eca4cc15142b2ffa7c11837569b39aece"
+            },
            "metaphlan3": {
                "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
            },
@ -47,4 +50,4 @@
            }
        }
    }
-}
+}
--- a/modules/nf-core/modules/megan/rma2info/main.nf
+++ b/modules/nf-core/modules/megan/rma2info/main.nf
@ -0,0 +1,38 @@
+process MEGAN_RMA2INFO {
+    tag "$meta.id"
+    label 'process_low'
+
+    conda (params.enable_conda ? "bioconda::megan=6.21.7" : null)
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/megan:6.21.7--h9ee0642_0':
+        'quay.io/biocontainers/megan:6.21.7--h9ee0642_0' }"
+
+    input:
+    tuple val(meta), path(rma6)
+    val(megan_summary)
+
+    output:
+    tuple val(meta), path("*.txt.gz")               , emit: txt
+    tuple val(meta), path("*.megan"), optional: true, emit: megan_summary
+    path "versions.yml"                             , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def summary = megan_summary ? "-es ${prefix}.megan" : ""
+    """
+    rma2info \\
+        -i ${rma6} \\
+        -o ${prefix}.txt.gz \\
+        ${summary} \\
+        $args
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        megan: \$(echo \$(rma2info 2>&1) | grep version | sed 's/.*version //g;s/, built.*//g')
+    END_VERSIONS
+    """
+}
--- a/modules/nf-core/modules/megan/rma2info/meta.yml
+++ b/modules/nf-core/modules/megan/rma2info/meta.yml
@ -0,0 +1,51 @@
+name: "megan_rma2info"
+description: Analyses an RMA file and exports information in text format
+keywords:
+  - megan
+  - rma6
+  - classification
+  - conversion
+tools:
+  - "megan":
+      description: "A tool for studying the taxonomic content of a set of DNA reads"
+      homepage: "https://uni-tuebingen.de/fakultaeten/mathematisch-naturwissenschaftliche-fakultaet/fachbereiche/informatik/lehrstuehle/algorithms-in-bioinformatics/software/megan6/"
+      documentation: "https://software-ab.informatik.uni-tuebingen.de/download/megan6/welcome.html"
+      tool_dev_url: "https://github.com/husonlab/megan-ce"
+      doi: "10.1371/journal.pcbi.1004957"
+      licence: "['GPL >=3']"
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - rma6:
+      type: file
+      description: RMA6 file from MEGAN or MALT
+      pattern: "*.rma6"
+  - megan_summary:
+      type: boolean
+      description: Specify whether to generate an MEGAN summary file
+
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - txt:
+      type: file
+      description: Compressed text file
+      pattern: "*.txt.gz"
+  - megan_summary:
+      type: file
+      description: Optionally generated MEGAN summary file
+      pattern: "*.megan"
+
+authors:
+  - "@jfy133"
--- a/nextflow.config
+++ b/nextflow.config
@ -89,6 +89,7 @@ params {
    centrifuge_save_unaligned  = false
    centrifuge_save_aligned    = false
    centrifuge_sam_format      = false
+
    // metaphlan3
    run_metaphlan3             = false
 }
--- a/subworkflows/local/profiling.nf
+++ b/subworkflows/local/profiling.nf
@ -14,8 +14,9 @@ workflow PROFILING {
    databases // [ [ meta ], path ]

    main:
-    ch_versions       = Channel.empty()
-    ch_multiqc_files  = Channel.empty()
+    ch_versions             = Channel.empty()
+    ch_multiqc_files        = Channel.empty()
+    ch_raw_profiles    = Channel.empty()

 /*
        COMBINE READS WITH POSSIBLE DATABASES
@ -89,30 +90,33 @@ workflow PROFILING {

    if ( params.run_malt ) {
        MALT_RUN ( ch_input_for_malt.reads, params.malt_mode, ch_input_for_malt.db )
-        ch_multiqc_files = ch_multiqc_files.mix( MALT_RUN.out.log.collect{it[1]}.ifEmpty([])  )
-        ch_versions = ch_versions.mix( MALT_RUN.out.versions.first() )
+        ch_multiqc_files        = ch_multiqc_files.mix( MALT_RUN.out.log.collect{it[1]}.ifEmpty([])  )
+        ch_versions             = ch_versions.mix( MALT_RUN.out.versions.first() )
+        ch_raw_profiles    = ch_raw_profiles.mix( MALT_RUN.out.rma6 )
    }

    if ( params.run_kraken2 ) {
        KRAKEN2_KRAKEN2 ( ch_input_for_kraken2.reads, ch_input_for_kraken2.db  )
-        ch_multiqc_files = ch_multiqc_files.mix( KRAKEN2_KRAKEN2.out.txt.collect{it[1]}.ifEmpty([])  )
-        ch_versions = ch_versions.mix( KRAKEN2_KRAKEN2.out.versions.first() )
+        ch_multiqc_files        = ch_multiqc_files.mix( KRAKEN2_KRAKEN2.out.txt.collect{it[1]}.ifEmpty([])  )
+        ch_versions             = ch_versions.mix( KRAKEN2_KRAKEN2.out.versions.first() )
+        ch_raw_profiles    = ch_raw_profiles.mix( KRAKEN2_KRAKEN2.out.txt )
    }

    if ( params.run_centrifuge ) {
        CENTRIFUGE_CENTRIFUGE ( ch_input_for_centrifuge.reads, ch_input_for_centrifuge.db, params.centrifuge_save_unaligned, params.centrifuge_save_aligned, params.centrifuge_sam_format  )
-        ch_versions = ch_versions.mix( CENTRIFUGE_CENTRIFUGE.out.versions.first() )
+        ch_versions             = ch_versions.mix( CENTRIFUGE_CENTRIFUGE.out.versions.first() )
+        ch_raw_profiles    = ch_raw_profiles.mix( CENTRIFUGE_CENTRIFUGE.out.report )
    }

    if ( params.run_metaphlan3 ) {
        METAPHLAN3 ( ch_input_for_metaphlan3.reads, ch_input_for_metaphlan3.db )
        ch_versions = ch_versions.mix( METAPHLAN3.out.versions.first() )
+        ch_raw_profiles    = ch_raw_profiles.mix( METAPHLAN3.out.biom )
    }


    emit:
-    // TODO work out if there is enough standardisation of output to export as one?
-    //output    = ch_filtered_reads    // channel: [ val(meta), [ reads ] ]
+    profiles = ch_raw_profiles    // channel: [ val(meta), [ reads ] ]
    versions = ch_versions          // channel: [ versions.yml ]
    mqc      = ch_multiqc_files
 }
--- a/subworkflows/local/shortread_postprocessing.nf
+++ b/subworkflows/local/shortread_postprocessing.nf
@ -0,0 +1,39 @@
+//
+// Perform read trimming and merging
+//
+
+
+include { SHORTREAD_FASTP             } from './shortread_fastp'
+include { SHORTREAD_ADAPTERREMOVAL    } from './shortread_adapterremoval'
+include { FASTQC as FASTQC_PROCESSED       } from '../../modules/nf-core/modules/fastqc/main'
+
+workflow SHORTREAD_POSTPROCESSING {
+    take:
+    input //  [ [ meta ], [ taxon_table/file ] ]
+
+    main:
+    ch_versions       = Channel.empty()
+    ch_multiqc_files  = Channel.empty()
+
+    if ( params.shortread_clipmerge_tool == "fastp" ) {
+        ch_processed_reads = SHORTREAD_FASTP ( reads ).reads
+        ch_versions        =  ch_versions.mix( SHORTREAD_FASTP.out.versions )
+        ch_multiqc_files   =  ch_multiqc_files.mix( SHORTREAD_FASTP.out.mqc )
+    } else if ( params.shortread_clipmerge_tool == "adapterremoval" ) {
+        ch_processed_reads = SHORTREAD_ADAPTERREMOVAL ( reads ).reads
+        ch_versions        = ch_versions.mix( SHORTREAD_ADAPTERREMOVAL.out.versions )
+        ch_multiqc_files   = ch_multiqc_files.mix( SHORTREAD_ADAPTERREMOVAL.out.mqc )
+    } else {
+        ch_processed_reads = reads
+    }
+
+    FASTQC_PROCESSED ( ch_processed_reads )
+    ch_versions = ch_versions.mix( FASTQC_PROCESSED.out.versions )
+    ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip )
+
+    emit:
+    output   = output   // channel: [ val(meta), taxon_table ]
+    versions = ch_versions          // channel: [ versions.yml ]
+    mqc      = ch_multiqc_files
+}
+