Add kaiju in taxprofiler

2024-11-22 02:56:04 +00:00 · 2022-04-13 18:51:56 +02:00 · 2022-04-13 18:51:56 +02:00 · 68ce5843a4
commit 68ce5843a4
parent fc8a83245a
6 changed files with 126 additions and 6 deletions
--- a/conf/modules.config
+++ b/conf/modules.config
@ -245,4 +245,13 @@ process {
        ]
    }

+    withName: KAIJU_KAIJU {
+        publishDir = [
+            path: { "${params.outdir}/kaiju/${meta.db_name}" },
+            mode: params.publish_dir_mode,
+            pattern: '*.tsv'
+        ]
+        ext.args = { "${meta.db_params}" }
+        ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
+    }
 }
--- a/conf/test.config
+++ b/conf/test.config
@ -22,15 +22,16 @@ params {
    // Input data
    // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets
    // TODO nf-core: Give any required params for the test so that command line flags are not needed
-    input                         = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
-    databases                     = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv'
-    run_kraken2                   = true
-    run_malt                      = true
-    run_metaphlan3                = true
-    run_centrifuge                = true
+    input                                 = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
+    databases                             = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv'
+    run_kraken2                           = true
+    run_malt                              = true
+    run_metaphlan3                        = true
+    run_centrifuge                        = true
    perform_shortread_clipmerge           = true
    perform_longread_clip                 = false
    perform_shortread_complexityfilter    = true
    perform_shortread_hostremoval         = true
    shortread_hostremoval_reference       = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
+    run_kaiju                             = true
 }
--- a/modules/nf-core/modules/kaiju/kaiju/main.nf
+++ b/modules/nf-core/modules/kaiju/kaiju/main.nf
@ -0,0 +1,41 @@
+process KAIJU_KAIJU {
+    tag "$meta.id"
+    label 'process_high'
+
+    conda (params.enable_conda ? "bioconda::kaiju=1.8.2" : null)
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/kaiju:1.8.2--h5b5514e_1':
+        'quay.io/biocontainers/kaiju:1.8.2--h5b5514e_1' }"
+
+    input:
+    tuple val(meta), path(reads)
+    path(db)
+
+    output:
+    tuple val(meta), path('*.tsv'), emit: results
+    path "versions.yml"           , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def input = meta.single_end ? "-i ${reads}" : "-i ${reads[0]} -j ${reads[1]}"
+    """
+    dbnodes=`find -L ${db} -name "*nodes.dmp"`
+    dbname=`find -L ${db} -name "*.fmi" -not -name "._*"`
+    kaiju \\
+        $args \\
+        -z $task.cpus \\
+        -t \$dbnodes \\
+        -f \$dbname \\
+        -o ${prefix}.tsv \\
+        $input
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        kaiju: \$(echo \$( kaiju -h 2>&1 | sed -n 1p | sed 's/^.*Kaiju //' ))
+    END_VERSIONS
+    """
+}
--- a/modules/nf-core/modules/kaiju/kaiju/meta.yml
+++ b/modules/nf-core/modules/kaiju/kaiju/meta.yml
@ -0,0 +1,53 @@
+name: kaiju_kaiju
+description: Taxonomic classification of metagenomic sequence data using a protein reference database
+keywords:
+  - classify
+  - metagenomics
+  - fastq
+  - taxonomic profiling
+tools:
+  - kaiju:
+      description: Fast and sensitive taxonomic classification for metagenomics
+      homepage: https://kaiju.binf.ku.dk/
+      documentation: https://github.com/bioinformatics-centre/kaiju/blob/master/README.md
+      tool_dev_url: https://github.com/bioinformatics-centre/kaiju
+      doi: "10.1038/ncomms11257"
+      licence: ["GNU GPL v3"]
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - reads:
+      type: file
+      description: |
+        List of input fastq/fasta files of size 1 and 2 for single-end and paired-end data,
+        respectively.
+      pattern: "*.{fastq,fq,fasta,fa,fsa,fas,fna,fastq.gz,fq.gz,fasta.gz,fa.gz,fsa.gz,fas.gz,fna.gz}"
+  - db:
+      type: files
+      description: |
+        List containing the database and nodes files for Kaiju
+        e.g. [ 'database.fmi', 'nodes.dmp' ]
+
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - results:
+      type: file
+      description: Results with taxonomic classification of each read
+      pattern: "*.tsv"
+
+authors:
+  - "@talnor"
+  - "@sofstam"
+  - "@jfy133"
--- a/nextflow.config
+++ b/nextflow.config
@ -100,6 +100,9 @@ params {

    // metaphlan3
    run_metaphlan3             = false
+
+    // kaiju
+    run_kaiju                  = false
 }

 // Load base.config by default for all pipelines
--- a/subworkflows/local/profiling.nf
+++ b/subworkflows/local/profiling.nf
@ -6,6 +6,7 @@ include { MALT_RUN                    } from '../../modules/nf-core/modules/malt
 include { KRAKEN2_KRAKEN2             } from '../../modules/nf-core/modules/kraken2/kraken2/main'
 include { CENTRIFUGE_CENTRIFUGE       } from '../../modules/nf-core/modules/centrifuge/centrifuge/main'
 include { METAPHLAN3                  } from '../../modules/nf-core/modules/metaphlan3/main'
+include { KAIJU_KAIJU                 } from '../../modules/nf-core/modules/kaiju/kaiju/main'

 workflow PROFILING {
    take:
@ -35,6 +36,7 @@ workflow PROFILING {
                kraken2: it[2]['tool'] == 'kraken2'
                metaphlan3: it[2]['tool'] == 'metaphlan3'
                centrifuge: it[2]['tool'] == 'centrifuge'
+                kaiju: it[2]['tool'] == 'kaiju'
                unknown: true
            }

@ -88,6 +90,13 @@ workflow PROFILING {
                                    db: it[3]
                            }

+    ch_input_for_kaiju = ch_input_for_profiling.kaiju
+                            .multiMap {
+                                it ->
+                                    reads: [it[0] + it[2], it[1]]
+                                    db: it[3]
+                            }
+
    /*
        RUN PROFILING
    */
@ -114,6 +123,10 @@ workflow PROFILING {
        ch_versions = ch_versions.mix( METAPHLAN3.out.versions.first() )
    }

+    if ( params.run_kaiju ) {
+        KAIJU_KAIJU ( ch_input_for_kaiju.reads, ch_input_for_kaiju.db )
+        ch_versions = ch_versions.mix( KAIJU_KAIJU.out.versions.first() )
+    }

    emit:
    // TODO work out if there is enough standardisation of output to export as one?