Add kaiju_kaiju module (#1448)

* Added kaiju_kaiju module

* Update modules/kaiju/kaiju/main.nf

Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>

* Update modules/kaiju/kaiju/main.nf

Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>

* Update modules/kaiju/kaiju/meta.yml

Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>

* Update keywords

Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>

* Update output file naming

* Update output file naming

* update spacing for lint

* Update input file patterns

Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>

Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>
This commit is contained in:
Tanja Normark 2022-03-28 14:28:50 +02:00 committed by GitHub
parent cc671a5f3c
commit 1f483d9203
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 159 additions and 0 deletions

View file

@ -0,0 +1,39 @@
process KAIJU_KAIJU {
tag "$meta.id"
label 'process_high'
conda (params.enable_conda ? "bioconda::kaiju=1.8.2" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/kaiju:1.8.2--h5b5514e_1':
'quay.io/biocontainers/kaiju:1.8.2--h5b5514e_1' }"
input:
tuple val(meta), path(reads)
tuple path(db), path(dbnodes)
output:
tuple val(meta), path('*.tsv'), emit: results
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def input = meta.single_end ? "-i ${reads}" : "-i ${reads[0]} -j ${reads[1]}"
"""
kaiju \\
$args \\
-z $task.cpus \\
-t ${dbnodes} \\
-f ${db} \\
-o ${prefix}.tsv \\
$input
cat <<-END_VERSIONS > versions.yml
"${task.process}":
kaiju: \$(echo \$( kaiju -h 2>&1 | sed -n 1p | sed 's/^.*Kaiju //' ))
END_VERSIONS
"""
}

View file

@ -0,0 +1,52 @@
name: kaiju_kaiju
description: Taxonomic classification of metagenomic sequence data using a protein reference database
keywords:
- classify
- metagenomics
- fastq
- taxonomic profiling
tools:
- kaiju:
description: Fast and sensitive taxonomic classification for metagenomics
homepage: https://kaiju.binf.ku.dk/
documentation: https://github.com/bioinformatics-centre/kaiju/blob/master/README.md
tool_dev_url: https://github.com/bioinformatics-centre/kaiju
doi: "10.1038/ncomms11257"
licence: ["GNU GPL v3"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- reads:
type: file
description: |
List of input fastq/fasta files of size 1 and 2 for single-end and paired-end data,
respectively.
pattern: "*.{fastq,fq,fasta,fa,fsa,fas,fna,fastq.gz,fq.gz,fasta.gz,fa.gz,fsa.gz,fas.gz,fna.gz}"
- db:
type: files
description: |
List containing the database and nodes files for Kaiju
e.g. [ 'database.fmi', 'nodes.dmp' ]
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- results:
type: file
description: Results with taxonomic classification of each read
pattern: "*.tsv"
authors:
- "@talnor"
- "@sofstam"

View file

@ -961,6 +961,10 @@ jupyternotebook:
- modules/jupyternotebook/**
- tests/modules/jupyternotebook/**
kaiju/kaiju:
- modules/kaiju/kaiju/**
- tests/modules/kaiju/kaiju/**
kallisto/index:
- modules/kallisto/index/**
- tests/modules/kallisto/index/**

View file

@ -28,6 +28,10 @@ params {
kraken2_bracken = "${test_data_dir}/genomics/sarscov2/genome/db/kraken2_bracken"
kraken2_bracken_tar_gz = "${test_data_dir}/genomics/sarscov2/genome/db/kraken2_bracken.tar.gz"
kaiju_fmi = "${test_data_dir}/genomics/sarscov2/genome/db/kaiju/proteins.fmi"
kaiju_nodes = "${test_data_dir}/genomics/sarscov2/genome/db/kaiju/nodes.dmp"
kaiju_names = "${test_data_dir}/genomics/sarscov2/genome/db/kaiju/names.dmp"
ncbi_taxmap_zip = "${test_data_dir}/genomics/sarscov2/genome/db/maltextract/ncbi_taxmap.zip"
taxon_list_txt = "${test_data_dir}/genomics/sarscov2/genome/db/maltextract/taxon_list.txt"

View file

@ -0,0 +1,34 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { KAIJU_KAIJU } from '../../../../modules/kaiju/kaiju/main.nf'
workflow test_kaiju_kaiju_single_end {
input = [
[ id:'test', single_end:true ], // meta map
file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
]
db = [
file(params.test_data['sarscov2']['genome']['kaiju_fmi'], checkIfExists: true), // database
file(params.test_data['sarscov2']['genome']['kaiju_nodes'], checkIfExists: true) // taxon nodes
]
KAIJU_KAIJU ( input, db )
}
workflow test_kaiju_kaiju_paired_end {
input = [
[ id:'test', single_end:false ], // meta map
[ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
]
db = [
file(params.test_data['sarscov2']['genome']['kaiju_fmi'], checkIfExists: true), // database
file(params.test_data['sarscov2']['genome']['kaiju_nodes'], checkIfExists: true) // taxon nodes
]
KAIJU_KAIJU ( input, db )
}

View file

@ -0,0 +1,5 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}

View file

@ -0,0 +1,21 @@
- name: kaiju kaiju test_kaiju_kaiju_single_end
command: nextflow run tests/modules/kaiju/kaiju -entry test_kaiju_kaiju_single_end -c tests/config/nextflow.config
tags:
- kaiju/kaiju
- kaiju
files:
- path: output/kaiju/test.tsv
contains: ["C\tERR5069949.2257580\t2697049"]
- path: output/kaiju/versions.yml
md5sum: 7e218c0ea00a71dd3a5ec5aaf28804f4
- name: kaiju kaiju test_kaiju_kaiju_paired_end
command: nextflow run tests/modules/kaiju/kaiju -entry test_kaiju_kaiju_paired_end -c tests/config/nextflow.config
tags:
- kaiju/kaiju
- kaiju
files:
- path: output/kaiju/test.tsv
contains: ["C\tERR5069949.2257580\t2697049"]
- path: output/kaiju/versions.yml
md5sum: a74215f6f69979ae046fb1d65c56ac67