diff --git a/CITATIONS.md b/CITATIONS.md index 02621d9..fd8c52a 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -52,6 +52,10 @@ > Kim, Daehwan, Li Song, Florian P. Breitwieser, and Steven L. Salzberg. 2016. “Centrifuge: Rapid and Sensitive Classification of Metagenomic Sequences.” Genome Research 26 (12): 1721-29. doi: 10.1101/gr.210641.116. +- [DIAMOND](https://doi.org/10.1038/nmeth.3176) + +> Buchfink, Benjamin, Chao Xie, and Daniel H. Huson. 2015. “Fast and Sensitive Protein Alignment Using DIAMOND.” Nature Methods 12 (1): 59-60. doi: 10.1038/nmeth.3176. + ## Software packaging/containerisation tools - [Anaconda](https://anaconda.com) diff --git a/conf/modules.config b/conf/modules.config index a72561d..d8fb382 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -264,6 +264,36 @@ process { ] } + withName: KAIJU_KAIJU { + publishDir = [ + path: { "${params.outdir}/kaiju/${meta.db_name}" }, + mode: params.publish_dir_mode, + pattern: '*.tsv' + ] + ext.args = { "${meta.db_params}" } + ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } + } + + withName: KAIJU_KAIJU2TABLE { + ext.args = { "${meta.db_params}" } + ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } + publishDir = [ + path: { "${params.outdir}/kaiju/${meta.db_name}" }, + mode: params.publish_dir_mode, + pattern: '*.{txt}' + ] + } + + withName: DIAMOND_BLASTX { + ext.args = { "${meta.db_params}" } + ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } + publishDir = [ + path: { "${params.outdir}/diamond/${meta.db_name}" }, + mode: params.publish_dir_mode, + pattern: '*.{blast,xml,txt,daa,sam,tsv,paf}' + ] + } + withName: CUSTOM_DUMPSOFTWAREVERSIONS { publishDir = [ path: { "${params.outdir}/pipeline_info" }, @@ -279,24 +309,4 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - - withName: KAIJU_KAIJU { - publishDir = [ - path: { "${params.outdir}/kaiju/${meta.db_name}" }, - mode: params.publish_dir_mode, - pattern: '*.tsv' - ] - ext.args = { "${meta.db_params}" } - ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } - } - - withName: KAIJU_KAIJU2TABLE { - ext.args = { "${meta.db_params}" } - ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } - publishDir = [ - path: { "${params.outdir}/kaiju/${meta.db_name}" }, - mode: params.publish_dir_mode, - pattern: '*.{txt}' - ] - } } diff --git a/conf/test.config b/conf/test.config index ecf55bd..a2464b2 100644 --- a/conf/test.config +++ b/conf/test.config @@ -34,6 +34,7 @@ params { run_malt = true run_metaphlan3 = true run_centrifuge = true + run_diamond = true } process { diff --git a/docs/usage.md b/docs/usage.md index 5d3268b..cee2bb6 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -128,6 +128,9 @@ Expected (uncompressed) database files for each tool are as follows: - `kaiju_db_*.fmi` - `nodes.dmp` - `names.dmp` +- **DIAMOND** output of `diamond makedb`. Note: requires building with taxonomy files + to generate taxonomic profile. See [DIAMOND documentation](https://github.com/bbuchfink/diamond/wiki/3.-Command-line-options#makedb-options). A file named: + - `.dmnd` ## Running the pipeline diff --git a/modules.json b/modules.json index ffcde5d..a65926c 100644 --- a/modules.json +++ b/modules.json @@ -27,6 +27,9 @@ "custom/dumpsoftwareversions": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, + "diamond/blastx": { + "git_sha": "42564565b934eeb2449e35ec97ed13ff2a67f1de" + }, "fastp": { "git_sha": "d0a1cbb703a130c19f6796c3fce24fbe7dfce789" }, diff --git a/modules/nf-core/modules/diamond/blastx/main.nf b/modules/nf-core/modules/diamond/blastx/main.nf new file mode 100644 index 0000000..6703c1e --- /dev/null +++ b/modules/nf-core/modules/diamond/blastx/main.nf @@ -0,0 +1,53 @@ +process DIAMOND_BLASTX { + tag "$meta.id" + label 'process_medium' + + // Dimaond is limited to v2.0.9 because there is not a + // singularity version higher than this at the current time. + conda (params.enable_conda ? "bioconda::diamond=2.0.9" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/diamond:2.0.9--hdcc8f71_0' : + 'quay.io/biocontainers/diamond:2.0.9--hdcc8f71_0' }" + + input: + tuple val(meta), path(fasta) + path db + val outext + + output: + tuple val(meta), path('*.{blast,xml,txt,daa,sam,tsv,paf}'), emit: output + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + switch ( outext ) { + case "blast": outfmt = 0; break + case "xml": outfmt = 5; break + case "txt": outfmt = 6; break + case "daa": outfmt = 100; break + case "sam": outfmt = 101; break + case "tsv": outfmt = 102; break + case "paf": outfmt = 103; break + } + """ + DB=`find -L ./ -name "*.dmnd" | sed 's/.dmnd//'` + + diamond \\ + blastx \\ + --threads $task.cpus \\ + --db \$DB \\ + --query $fasta \\ + --outfmt ${outfmt} \\ + $args \\ + --out ${prefix}.${outext} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + diamond: \$(diamond --version 2>&1 | tail -n 1 | sed 's/^diamond version //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/diamond/blastx/meta.yml b/modules/nf-core/modules/diamond/blastx/meta.yml new file mode 100644 index 0000000..5ee2d55 --- /dev/null +++ b/modules/nf-core/modules/diamond/blastx/meta.yml @@ -0,0 +1,52 @@ +name: diamond_blastx +description: Queries a DIAMOND database using blastx mode +keywords: + - fasta + - diamond + - blastx + - DNA sequence +tools: + - diamond: + description: Accelerated BLAST compatible local sequence aligner + homepage: https://github.com/bbuchfink/diamond + documentation: https://github.com/bbuchfink/diamond/wiki + tool_dev_url: https://github.com/bbuchfink/diamond + doi: "doi:10.1038/s41592-021-01101-x" + licence: ["GPL v3.0"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Input fasta file containing query sequences + pattern: "*.{fa,fasta}" + - db: + type: directory + description: Directory containing the nucelotide blast database + pattern: "*" + - outext: + type: string + description: | + Specify the type of output file to be generated. `blast` corresponds to + BLAST pairwise format. `xml` corresponds to BLAST xml format. + `txt` corresponds to to BLAST tabular format. `tsv` corresponds to + taxonomic classification format. + pattern: "blast|xml|txt|daa|sam|tsv|paf" + +output: + - txt: + type: file + description: File containing blastx hits + pattern: "*.{blastx.txt}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@spficklin" + - "@jfy133" diff --git a/nextflow.config b/nextflow.config index 909da25..5644786 100644 --- a/nextflow.config +++ b/nextflow.config @@ -108,6 +108,10 @@ params { // kaiju run_kaiju = false kaiju_taxon_name = 'species' + + // diamond + run_diamond = false + diamond_output_format = 'txt' } // Load base.config by default for all pipelines diff --git a/nextflow_schema.json b/nextflow_schema.json index 83793e8..f429d1b 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -389,6 +389,14 @@ "type": "string", "default": "species", "enum": ["phylum", "class", "order", "family", "genus", "species"] + }, + "run_diamond": { + "type": "boolean" + }, + "diamond_output_format": { + "type": "string", + "default": "tsv", + "enum": ["blast", "xml", "txt", "daa", "sam", "tsv", "paf"] } } } diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index 1f1d4da..9389e19 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -10,6 +10,8 @@ include { CENTRIFUGE_KREPORT } from '../../modules/nf-core/modules/cent include { METAPHLAN3 } from '../../modules/nf-core/modules/metaphlan3/main' include { KAIJU_KAIJU } from '../../modules/nf-core/modules/kaiju/kaiju/main' include { KAIJU_KAIJU2TABLE } from '../../modules/nf-core/modules/kaiju/kaiju2table/main' +include { DIAMOND_BLASTX } from '../../modules/nf-core/modules/diamond/blastx/main' + workflow PROFILING { take: @@ -41,6 +43,7 @@ workflow PROFILING { metaphlan3: it[2]['tool'] == 'metaphlan3' centrifuge: it[2]['tool'] == 'centrifuge' kaiju: it[2]['tool'] == 'kaiju' + diamond: it[2]['tool'] == 'diamond' unknown: true } @@ -109,6 +112,13 @@ workflow PROFILING { db: it[3] } + ch_input_for_diamond = ch_input_for_profiling.diamond + .multiMap { + it -> + reads: [it[0] + it[2], it[1]] + db: it[3] + } + /* RUN PROFILING */ @@ -163,6 +173,12 @@ workflow PROFILING { ch_raw_profiles = ch_raw_profiles.mix( KAIJU_KAIJU2TABLE.out.summary ) } + if ( params.run_diamond ) { + DIAMOND_BLASTX ( ch_input_for_diamond.reads, ch_input_for_diamond.db, params.diamond_output_format ) + ch_versions = ch_versions.mix( DIAMOND_BLASTX.out.versions.first() ) + ch_raw_profiles = ch_raw_profiles.mix( DIAMOND_BLASTX.out.output ) + } + emit: profiles = ch_raw_profiles // channel: [ val(meta), [ reads ] ] - should be text files or biom versions = ch_versions // channel: [ versions.yml ]