Add centrifuge classification

2024-12-22 15:08:17 +00:00 · 2022-03-24 12:51:45 +01:00 · 2022-03-24 12:51:45 +01:00 · 3ff54e620e
commit 3ff54e620e
parent 358b89a4c6
9 changed files with 179 additions and 8 deletions
--- a/conf/modules.config
+++ b/conf/modules.config
@ -121,4 +121,14 @@ process {
        ]
    }

+    withName: CENTRIFUGE {
+        publishDir = [
+            path: { "${params.outdir}/centrifuge/${meta.db_name}" },
+            mode: 'copy',
+            pattern: '*.{fastq.gz,txt}'
+        ]
+        ext.args = { "${meta.db_params}" }
+        ext.prefix = { "${meta.id}-${meta.db_name}" }
+    }
+
 }
--- a/conf/test.config
+++ b/conf/test.config
@ -29,5 +29,6 @@ params {
    run_kraken2         = true
    run_malt            = true
    shortread_clipmerge = true
+    run_centrifuge      = true

 }
--- a/modules.json
+++ b/modules.json
@ -29,6 +29,9 @@
            "porechop": {
                "git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046"
            }
+            "centrifuge": {
+                "git_sha": "ea41a8a6f761b9993d857570e872abaae3fea555"
+            }
        }
    }
-}
+}
--- a/modules/nf-core/modules/centrifuge/main.nf
+++ b/modules/nf-core/modules/centrifuge/main.nf
@ -0,0 +1,63 @@
+process CENTRIFUGE {
+    tag "$meta.id"
+    label 'process_high'
+
+    conda (params.enable_conda ? "bioconda::centrifuge=1.0.4_beta" : null)
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/centrifuge:1.0.4_beta--h9a82719_6' :
+        'quay.io/biocontainers/centrifuge:1.0.4_beta--h9a82719_6' }"
+
+    input:
+    tuple val(meta), path(reads)
+    path db
+    val save_unaligned
+    val save_aligned
+    val sam_format
+
+    output:
+    tuple val(meta), path('*report.txt')                 , emit: report
+    tuple val(meta), path('*results.txt')                , emit: results
+    tuple val(meta), path('*kreport.txt')                , emit: kreport
+    tuple val(meta), path('*.sam')                       , optional: true, emit: sam
+    tuple val(meta), path('*.mapped.fastq{,.1,.2}.gz')   , optional: true, emit: fastq_mapped
+    tuple val(meta), path('*.unmapped.fastq{,.1,.2}.gz') , optional: true, emit: fastq_unmapped
+    path "versions.yml"                                  , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def paired = meta.single_end ? "-U ${reads}" :  "-1 ${reads[0]} -2 ${reads[1]}"
+    def db_name = db.toString().replace(".tar.gz","")
+    def unaligned = ''
+    def aligned = ''
+    if (meta.single_end) {
+        unaligned = save_unaligned ? "--un-gz ${prefix}.unmapped.fastq.gz" : ''
+        aligned = save_aligned ? "--al-gz ${prefix}.mapped.fastq.gz" : ''
+    } else {
+        unaligned = save_unaligned ? "--un-conc-gz ${prefix}.unmapped.fastq.gz" : ''
+        aligned = save_aligned ? "--al-conc-gz ${prefix}.mapped.fastq.gz" : ''
+    }
+    def sam_output = sam_format ? "--out-fmt 'sam'" : ''
+    """
+    tar -xf $db
+    centrifuge \\
+        -x $db_name \\
+        -p $task.cpus \\
+        $paired \\
+        --report-file ${prefix}.report.txt \\
+        -S ${prefix}.results.txt \\
+        $unaligned \\
+        $aligned \\
+        $sam_output \\
+        $args
+    centrifuge-kreport -x $db_name ${prefix}.results.txt > ${prefix}.kreport.txt
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        centrifuge: \$( centrifuge --version  | sed -n 1p | sed 's/^.*centrifuge-class version //')
+    END_VERSIONS
+    """
+}
--- a/modules/nf-core/modules/centrifuge/meta.yml
+++ b/modules/nf-core/modules/centrifuge/meta.yml
@ -0,0 +1,73 @@
+name: centrifuge
+description: Classifies metagenomic sequence data
+keywords:
+  - classify
+  - metagenomics
+  - fastq
+  - db
+tools:
+  - centrifuge:
+      description: Centrifuge is a classifier for metagenomic sequences.
+      homepage: https://ccb.jhu.edu/software/centrifuge/
+      documentation: https://ccb.jhu.edu/software/centrifuge/manual.shtml
+      doi: 10.1101/gr.210641.116
+      licence: ["GPL v3"]
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - reads:
+      type: file
+      description: |
+        List of input FastQ files of size 1 and 2 for single-end and paired-end data,
+        respectively.
+  - db:
+      type: directory
+      description: Centrifuge database in .tar.gz format
+      pattern: "*.tar.gz"
+  - save_unaligned:
+      type: value
+      description: If true unmapped fastq files are saved
+  - save_aligned:
+      type: value
+      description: If true mapped fastq files are saved
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - report:
+      type: file
+      description: |
+        File containing a classification summary
+      pattern: "*.{report.txt}"
+  - results:
+      type: file
+      description: |
+        File containing classification results
+      pattern: "*.{results.txt}"
+  - kreport:
+      type: file
+      description: |
+        File containing kraken-style report from centrifuge
+        out files.
+      pattern: "*.{kreport.txt}"
+  - fastq_unmapped:
+      type: file
+      description: Unmapped fastq files
+      pattern: "*.unmapped.fastq.gz"
+  - fastq_mapped:
+      type: file
+      description: Mapped fastq files
+      pattern: "*.mapped.fastq.gz"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+authors:
+  - "@sofstam"
+  - "@jfy133"
+  - "@sateeshperi"
--- a/nextflow.config
+++ b/nextflow.config
@ -56,7 +56,7 @@ params {

    // FASTQ preprocessing
    shortread_clipmerge           = false
-    shortread_excludeunmerged        = true
+    shortread_excludeunmerged     = true
    longread_clip                 = false

    // MALT
@ -65,6 +65,12 @@ params {

    // kraken2
    run_kraken2                = false
+
+    // centrifuge
+    run_centrifuge             = false
+    save_unaligned             = false
+    save_aligned               = false
+    sam_format                 = false
 }

 // Load base.config by default for all pipelines
--- a/subworkflows/local/db_check.nf
+++ b/subworkflows/local/db_check.nf
@ -21,7 +21,7 @@ workflow DB_CHECK {

    ch_dbs_for_untar = parsed_samplesheet
        .branch {
-            untar: it[1].toString().endsWith(".tar.gz")
+            untar: it[1].toString().endsWith(".tar.gz") && it[0]['tool']!="centrifuge"
            skip: true
        }

--- a/subworkflows/local/input_check.nf
+++ b/subworkflows/local/input_check.nf
@ -67,8 +67,9 @@ def create_fastq_channel(LinkedHashMap row) {
            if (!file(row.fastq_2).exists()) {
                exit 1, "ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${row.fastq_2}"
            }
-            fastq_meta = [ meta, [ file(row.fastq_1), file(row.fastq_2) ] ]
+         fastq_meta = [ meta, [ file(row.fastq_1), file(row.fastq_2) ] ]
        }
+
    }
    return fastq_meta
 }
--- a/workflows/taxprofiler.nf
+++ b/workflows/taxprofiler.nf
@ -58,7 +58,7 @@ include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/modules/custom/
 include { CAT_FASTQ                   } from '../modules/nf-core/modules/cat/fastq/main'
 include { MALT_RUN                    } from '../modules/nf-core/modules/malt/run/main'
 include { KRAKEN2_KRAKEN2             } from '../modules/nf-core/modules/kraken2/kraken2/main'
-
+include { CENTRIFUGE                  } from '../modules/nf-core/modules/centrifuge/main'

 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -149,9 +149,10 @@ workflow TAXPROFILER {
            .combine(DB_CHECK.out.dbs)
            .dump(tag: "reads_plus_db")
            .branch {
-                malt:    it[2]['tool'] == 'malt'
-                kraken2: it[2]['tool'] == 'kraken2'
-                unknown: true
+                malt:       it[2]['tool'] == 'malt'
+                kraken2:    it[2]['tool'] == 'kraken2'
+                centrifuge: it[2]['tool'] == 'centrifuge'
+                unknown:    true
            }

    //
@ -184,6 +185,15 @@ workflow TAXPROFILER {
                                    db: it[3]
                            }

+    // We can run centrifuge one-by-one sample-wise
+    ch_input_for_centrifuge =  ch_input_for_profiling.centrifuge
+                               .dump(tag: "input for centrifuge")
+                               .multiMap {
+                                    it ->
+                                        reads: [ it[0] + it[2], it[1] ]
+                                        db: it[3]
+                                }
+
    //
    // RUN PROFILING
    //
@ -195,6 +205,10 @@ workflow TAXPROFILER {
        KRAKEN2_KRAKEN2 ( ch_input_for_kraken2.reads, ch_input_for_kraken2.db  )
    }

+    if ( params.run_centrifuge ) {
+        CENTRIFUGE ( ch_input_for_centrifuge.reads, ch_input_for_centrifuge.db, params.save_unaligned, params.save_aligned, params.sam_format  )
+    }
+
    //
    // MODULE: MultiQC
    //