diff --git a/README.md b/README.md index f1d59d5..66578e5 100644 --- a/README.md +++ b/README.md @@ -44,6 +44,7 @@ On release, automated continuous integration tests run the pipeline on a full-si - [Centrifuge](https://ccb.jhu.edu/software/centrifuge/) - [Kaiju](https://kaiju.binf.ku.dk/) - [mOTUs](https://motu-tool.org/) + - [MetaMaps](https://github.com/DiltheyLab/MetaMaps) 4. Perform optional post-processing with: - [bracken](https://ccb.jhu.edu/software/bracken/) 5. Standardises output tables diff --git a/conf/modules.config b/conf/modules.config index 23455a4..b0e5d61 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -237,7 +237,7 @@ process { pattern: '*.txt' ] ext.args = { "${meta.db_params}" } - ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" } + ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } } withName: CUSTOM_DUMPSOFTWAREVERSIONS { @@ -256,4 +256,13 @@ process { ] } + withName: KAIJU_KAIJU { + publishDir = [ + path: { "${params.outdir}/kaiju/${meta.db_name}" }, + mode: params.publish_dir_mode, + pattern: '*.tsv' + ] + ext.args = { "${meta.db_params}" } + ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } + } } diff --git a/conf/test.config b/conf/test.config index 9fa5de8..107beb5 100644 --- a/conf/test.config +++ b/conf/test.config @@ -22,15 +22,16 @@ params { // Input data // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv' - databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv' - run_kraken2 = true - run_malt = true - run_metaphlan3 = true - run_centrifuge = true + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv' + databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv' perform_shortread_clipmerge = true perform_longread_clip = false perform_shortread_complexityfilter = true perform_shortread_hostremoval = true shortread_hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta' + run_kaiju = true + run_kraken2 = true + run_malt = true + run_metaphlan3 = true + run_centrifuge = true } diff --git a/docs/usage.md b/docs/usage.md index f4f11fb..5d3268b 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -124,6 +124,10 @@ Expected (uncompressed) database files for each tool are as follows: - `mpa_v30_CHOCOPhlAn_201901.rev.1.bt2` - `mpa_v30_CHOCOPhlAn_201901.rev.2.bt2` - `mpa_latest` +- **Kaiju** output of `kaiju-makedb`. A directory containing: + - `kaiju_db_*.fmi` + - `nodes.dmp` + - `names.dmp` ## Running the pipeline diff --git a/modules.json b/modules.json index 18dea60..d6be2da 100644 --- a/modules.json +++ b/modules.json @@ -53,6 +53,9 @@ }, "untar": { "git_sha": "e080f4c8acf5760039ed12ec1f206170f3f9a918" + }, + "kaiju/kaiju": { + "git_sha": "8856f127c58f6af479128be8b8df4d42e442ddbe" } } } diff --git a/modules/nf-core/modules/kaiju/kaiju/main.nf b/modules/nf-core/modules/kaiju/kaiju/main.nf new file mode 100644 index 0000000..ae8f99e --- /dev/null +++ b/modules/nf-core/modules/kaiju/kaiju/main.nf @@ -0,0 +1,41 @@ +process KAIJU_KAIJU { + tag "$meta.id" + label 'process_high' + + conda (params.enable_conda ? "bioconda::kaiju=1.8.2" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/kaiju:1.8.2--h5b5514e_1': + 'quay.io/biocontainers/kaiju:1.8.2--h5b5514e_1' }" + + input: + tuple val(meta), path(reads) + path(db) + + output: + tuple val(meta), path('*.tsv'), emit: results + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def input = meta.single_end ? "-i ${reads}" : "-i ${reads[0]} -j ${reads[1]}" + """ + dbnodes=`find -L ${db} -name "*nodes.dmp"` + dbname=`find -L ${db} -name "*.fmi" -not -name "._*"` + kaiju \\ + $args \\ + -z $task.cpus \\ + -t \$dbnodes \\ + -f \$dbname \\ + -o ${prefix}.tsv \\ + $input + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + kaiju: \$(echo \$( kaiju -h 2>&1 | sed -n 1p | sed 's/^.*Kaiju //' )) + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/kaiju/kaiju/meta.yml b/modules/nf-core/modules/kaiju/kaiju/meta.yml new file mode 100644 index 0000000..e24c8ef --- /dev/null +++ b/modules/nf-core/modules/kaiju/kaiju/meta.yml @@ -0,0 +1,53 @@ +name: kaiju_kaiju +description: Taxonomic classification of metagenomic sequence data using a protein reference database +keywords: + - classify + - metagenomics + - fastq + - taxonomic profiling +tools: + - kaiju: + description: Fast and sensitive taxonomic classification for metagenomics + homepage: https://kaiju.binf.ku.dk/ + documentation: https://github.com/bioinformatics-centre/kaiju/blob/master/README.md + tool_dev_url: https://github.com/bioinformatics-centre/kaiju + doi: "10.1038/ncomms11257" + licence: ["GNU GPL v3"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input fastq/fasta files of size 1 and 2 for single-end and paired-end data, + respectively. + pattern: "*.{fastq,fq,fasta,fa,fsa,fas,fna,fastq.gz,fq.gz,fasta.gz,fa.gz,fsa.gz,fas.gz,fna.gz}" + - db: + type: files + description: | + List containing the database and nodes files for Kaiju + e.g. [ 'database.fmi', 'nodes.dmp' ] + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - results: + type: file + description: Results with taxonomic classification of each read + pattern: "*.tsv" + +authors: + - "@talnor" + - "@sofstam" + - "@jfy133" diff --git a/nextflow.config b/nextflow.config index a618f66..fce21ad 100644 --- a/nextflow.config +++ b/nextflow.config @@ -101,6 +101,9 @@ params { // metaphlan3 run_metaphlan3 = false + + // kaiju + run_kaiju = false } // Load base.config by default for all pipelines diff --git a/nextflow_schema.json b/nextflow_schema.json index 2cbfb0e..3d3ad96 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -370,6 +370,9 @@ "type": "string", "default": "None" }, + "run_kaiju": { + "type": "boolean" + }, "malt_generatemegansummary": { "type": "boolean" } diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index ff20e86..8d19bfe 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -7,6 +7,7 @@ include { MEGAN_RMA2INFO } from '../../modules/nf-core/modules/mega include { KRAKEN2_KRAKEN2 } from '../../modules/nf-core/modules/kraken2/kraken2/main' include { CENTRIFUGE_CENTRIFUGE } from '../../modules/nf-core/modules/centrifuge/centrifuge/main' include { METAPHLAN3 } from '../../modules/nf-core/modules/metaphlan3/main' +include { KAIJU_KAIJU } from '../../modules/nf-core/modules/kaiju/kaiju/main' workflow PROFILING { take: @@ -37,6 +38,7 @@ workflow PROFILING { kraken2: it[2]['tool'] == 'kraken2' metaphlan3: it[2]['tool'] == 'metaphlan3' centrifuge: it[2]['tool'] == 'centrifuge' + kaiju: it[2]['tool'] == 'kaiju' unknown: true } @@ -98,6 +100,13 @@ workflow PROFILING { db: it[3] } + ch_input_for_kaiju = ch_input_for_profiling.kaiju + .multiMap { + it -> + reads: [it[0] + it[2], it[1]] + db: it[3] + } + /* RUN PROFILING */ @@ -143,6 +152,10 @@ workflow PROFILING { ch_raw_profiles = ch_raw_profiles.mix( METAPHLAN3.out.biom ) } + if ( params.run_kaiju ) { + KAIJU_KAIJU ( ch_input_for_kaiju.reads, ch_input_for_kaiju.db ) + ch_versions = ch_versions.mix( KAIJU_KAIJU.out.versions.first() ) + } emit: profiles = ch_raw_profiles // channel: [ val(meta), [ reads ] ] - should be text files or biom