1
0
Fork 0
mirror of https://github.com/MillironX/taxprofiler.git synced 2024-09-21 04:22:04 +00:00

Merge pull request #53 from genomic-medicine-sweden/classification_kaiju

Add kaiju/kaiju in taxprofiler
This commit is contained in:
Sofia Stamouli 2022-04-20 10:57:00 +02:00 committed by GitHub
commit e71e7b86a2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 138 additions and 7 deletions

View file

@ -44,6 +44,7 @@ On release, automated continuous integration tests run the pipeline on a full-si
- [Centrifuge](https://ccb.jhu.edu/software/centrifuge/)
- [Kaiju](https://kaiju.binf.ku.dk/)
- [mOTUs](https://motu-tool.org/)
- [MetaMaps](https://github.com/DiltheyLab/MetaMaps)
4. Perform optional post-processing with:
- [bracken](https://ccb.jhu.edu/software/bracken/)
5. Standardises output tables

View file

@ -237,7 +237,7 @@ process {
pattern: '*.txt'
]
ext.args = { "${meta.db_params}" }
ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
}
withName: CUSTOM_DUMPSOFTWAREVERSIONS {
@ -256,4 +256,13 @@ process {
]
}
withName: KAIJU_KAIJU {
publishDir = [
path: { "${params.outdir}/kaiju/${meta.db_name}" },
mode: params.publish_dir_mode,
pattern: '*.tsv'
]
ext.args = { "${meta.db_params}" }
ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
}
}

View file

@ -22,15 +22,16 @@ params {
// Input data
// TODO nf-core: Specify the paths to your test data on nf-core/test-datasets
// TODO nf-core: Give any required params for the test so that command line flags are not needed
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv'
run_kraken2 = true
run_malt = true
run_metaphlan3 = true
run_centrifuge = true
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv'
perform_shortread_clipmerge = true
perform_longread_clip = false
perform_shortread_complexityfilter = true
perform_shortread_hostremoval = true
shortread_hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
run_kaiju = true
run_kraken2 = true
run_malt = true
run_metaphlan3 = true
run_centrifuge = true
}

View file

@ -124,6 +124,10 @@ Expected (uncompressed) database files for each tool are as follows:
- `mpa_v30_CHOCOPhlAn_201901.rev.1.bt2`
- `mpa_v30_CHOCOPhlAn_201901.rev.2.bt2`
- `mpa_latest`
- **Kaiju** output of `kaiju-makedb`. A directory containing:
- `kaiju_db_*.fmi`
- `nodes.dmp`
- `names.dmp`
## Running the pipeline

View file

@ -53,6 +53,9 @@
},
"untar": {
"git_sha": "e080f4c8acf5760039ed12ec1f206170f3f9a918"
},
"kaiju/kaiju": {
"git_sha": "8856f127c58f6af479128be8b8df4d42e442ddbe"
}
}
}

View file

@ -0,0 +1,41 @@
process KAIJU_KAIJU {
tag "$meta.id"
label 'process_high'
conda (params.enable_conda ? "bioconda::kaiju=1.8.2" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/kaiju:1.8.2--h5b5514e_1':
'quay.io/biocontainers/kaiju:1.8.2--h5b5514e_1' }"
input:
tuple val(meta), path(reads)
path(db)
output:
tuple val(meta), path('*.tsv'), emit: results
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def input = meta.single_end ? "-i ${reads}" : "-i ${reads[0]} -j ${reads[1]}"
"""
dbnodes=`find -L ${db} -name "*nodes.dmp"`
dbname=`find -L ${db} -name "*.fmi" -not -name "._*"`
kaiju \\
$args \\
-z $task.cpus \\
-t \$dbnodes \\
-f \$dbname \\
-o ${prefix}.tsv \\
$input
cat <<-END_VERSIONS > versions.yml
"${task.process}":
kaiju: \$(echo \$( kaiju -h 2>&1 | sed -n 1p | sed 's/^.*Kaiju //' ))
END_VERSIONS
"""
}

View file

@ -0,0 +1,53 @@
name: kaiju_kaiju
description: Taxonomic classification of metagenomic sequence data using a protein reference database
keywords:
- classify
- metagenomics
- fastq
- taxonomic profiling
tools:
- kaiju:
description: Fast and sensitive taxonomic classification for metagenomics
homepage: https://kaiju.binf.ku.dk/
documentation: https://github.com/bioinformatics-centre/kaiju/blob/master/README.md
tool_dev_url: https://github.com/bioinformatics-centre/kaiju
doi: "10.1038/ncomms11257"
licence: ["GNU GPL v3"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- reads:
type: file
description: |
List of input fastq/fasta files of size 1 and 2 for single-end and paired-end data,
respectively.
pattern: "*.{fastq,fq,fasta,fa,fsa,fas,fna,fastq.gz,fq.gz,fasta.gz,fa.gz,fsa.gz,fas.gz,fna.gz}"
- db:
type: files
description: |
List containing the database and nodes files for Kaiju
e.g. [ 'database.fmi', 'nodes.dmp' ]
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- results:
type: file
description: Results with taxonomic classification of each read
pattern: "*.tsv"
authors:
- "@talnor"
- "@sofstam"
- "@jfy133"

View file

@ -101,6 +101,9 @@ params {
// metaphlan3
run_metaphlan3 = false
// kaiju
run_kaiju = false
}
// Load base.config by default for all pipelines

View file

@ -370,6 +370,9 @@
"type": "string",
"default": "None"
},
"run_kaiju": {
"type": "boolean"
},
"malt_generatemegansummary": {
"type": "boolean"
}

View file

@ -7,6 +7,7 @@ include { MEGAN_RMA2INFO } from '../../modules/nf-core/modules/mega
include { KRAKEN2_KRAKEN2 } from '../../modules/nf-core/modules/kraken2/kraken2/main'
include { CENTRIFUGE_CENTRIFUGE } from '../../modules/nf-core/modules/centrifuge/centrifuge/main'
include { METAPHLAN3 } from '../../modules/nf-core/modules/metaphlan3/main'
include { KAIJU_KAIJU } from '../../modules/nf-core/modules/kaiju/kaiju/main'
workflow PROFILING {
take:
@ -37,6 +38,7 @@ workflow PROFILING {
kraken2: it[2]['tool'] == 'kraken2'
metaphlan3: it[2]['tool'] == 'metaphlan3'
centrifuge: it[2]['tool'] == 'centrifuge'
kaiju: it[2]['tool'] == 'kaiju'
unknown: true
}
@ -98,6 +100,13 @@ workflow PROFILING {
db: it[3]
}
ch_input_for_kaiju = ch_input_for_profiling.kaiju
.multiMap {
it ->
reads: [it[0] + it[2], it[1]]
db: it[3]
}
/*
RUN PROFILING
*/
@ -143,6 +152,10 @@ workflow PROFILING {
ch_raw_profiles = ch_raw_profiles.mix( METAPHLAN3.out.biom )
}
if ( params.run_kaiju ) {
KAIJU_KAIJU ( ch_input_for_kaiju.reads, ch_input_for_kaiju.db )
ch_versions = ch_versions.mix( KAIJU_KAIJU.out.versions.first() )
}
emit:
profiles = ch_raw_profiles // channel: [ val(meta), [ reads ] ] - should be text files or biom