Update diamond/blastx diamond/blastp to support all possible output formats

This commit is contained in:
James Fellows Yates 2022-04-29 10:26:17 +02:00
parent 4e73a9e9c5
commit 42564565b9
8 changed files with 108 additions and 15 deletions

View file

@ -11,10 +11,11 @@ process DIAMOND_BLASTP {
input: input:
tuple val(meta), path(fasta) tuple val(meta), path(fasta)
path db path db
val outext
output: output:
tuple val(meta), path('*.txt'), emit: txt tuple val(meta), path('*.{blast,xml,txt,daa,sam,tsv,paf}'), emit: output
path "versions.yml" , emit: versions path "versions.yml" , emit: versions
when: when:
@ -23,6 +24,15 @@ process DIAMOND_BLASTP {
script: script:
def args = task.ext.args ?: '' def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}" def prefix = task.ext.prefix ?: "${meta.id}"
switch ( outext ) {
case "blast": outfmt = 0; break
case "xml": outfmt = 5; break
case "txt": outfmt = 6; break
case "daa": outfmt = 100; break
case "sam": outfmt = 101; break
case "tsv": outfmt = 102; break
case "paf": outfmt = 103; break
}
""" """
DB=`find -L ./ -name "*.dmnd" | sed 's/.dmnd//'` DB=`find -L ./ -name "*.dmnd" | sed 's/.dmnd//'`
@ -31,8 +41,9 @@ process DIAMOND_BLASTP {
--threads $task.cpus \\ --threads $task.cpus \\
--db \$DB \\ --db \$DB \\
--query $fasta \\ --query $fasta \\
--outfmt ${outfmt} \\
$args \\ $args \\
--out ${prefix}.txt --out ${prefix}.${outext}
cat <<-END_VERSIONS > versions.yml cat <<-END_VERSIONS > versions.yml
"${task.process}": "${task.process}":

View file

@ -28,6 +28,14 @@ input:
type: directory type: directory
description: Directory containing the protein blast database description: Directory containing the protein blast database
pattern: "*" pattern: "*"
- outext:
type: string
description: |
Specify the type of output file to be generated. `blast` corresponds to
BLAST pairwise format. `xml` corresponds to BLAST xml format.
`txt` corresponds to to BLAST tabular format. `tsv` corresponds to
taxonomic classification format.
pattern: "blast|xml|txt|daa|sam|tsv|paf"
output: output:
- txt: - txt:
@ -41,3 +49,4 @@ output:
authors: authors:
- "@spficklin" - "@spficklin"
- "@jfy133"

View file

@ -11,10 +11,11 @@ process DIAMOND_BLASTX {
input: input:
tuple val(meta), path(fasta) tuple val(meta), path(fasta)
path db path db
val outext
output: output:
tuple val(meta), path('*.txt'), emit: txt tuple val(meta), path('*.{blast,xml,txt,daa,sam,tsv,paf}'), emit: output
path "versions.yml" , emit: versions path "versions.yml" , emit: versions
when: when:
@ -23,6 +24,15 @@ process DIAMOND_BLASTX {
script: script:
def args = task.ext.args ?: '' def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}" def prefix = task.ext.prefix ?: "${meta.id}"
switch ( outext ) {
case "blast": outfmt = 0; break
case "xml": outfmt = 5; break
case "txt": outfmt = 6; break
case "daa": outfmt = 100; break
case "sam": outfmt = 101; break
case "tsv": outfmt = 102; break
case "paf": outfmt = 103; break
}
""" """
DB=`find -L ./ -name "*.dmnd" | sed 's/.dmnd//'` DB=`find -L ./ -name "*.dmnd" | sed 's/.dmnd//'`
@ -31,8 +41,9 @@ process DIAMOND_BLASTX {
--threads $task.cpus \\ --threads $task.cpus \\
--db \$DB \\ --db \$DB \\
--query $fasta \\ --query $fasta \\
--outfmt ${outfmt} \\
$args \\ $args \\
--out ${prefix}.txt --out ${prefix}.${outext}
cat <<-END_VERSIONS > versions.yml cat <<-END_VERSIONS > versions.yml
"${task.process}": "${task.process}":

View file

@ -28,6 +28,14 @@ input:
type: directory type: directory
description: Directory containing the nucelotide blast database description: Directory containing the nucelotide blast database
pattern: "*" pattern: "*"
- outext:
type: string
description: |
Specify the type of output file to be generated. `blast` corresponds to
BLAST pairwise format. `xml` corresponds to BLAST xml format.
`txt` corresponds to to BLAST tabular format. `tsv` corresponds to
taxonomic classification format.
pattern: "blast|xml|txt|daa|sam|tsv|paf"
output: output:
- txt: - txt:
@ -41,3 +49,4 @@ output:
authors: authors:
- "@spficklin" - "@spficklin"
- "@jfy133"

View file

@ -9,7 +9,18 @@ workflow test_diamond_blastp {
db = [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ] db = [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ]
fasta = [ file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true) ] fasta = [ file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true) ]
outext = 'txt'
DIAMOND_MAKEDB ( db ) DIAMOND_MAKEDB ( db )
DIAMOND_BLASTP ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db ) DIAMOND_BLASTP ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db, outext )
}
workflow test_diamond_blastp_daa {
db = [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ]
fasta = [ file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true) ]
outext = 'daa'
DIAMOND_MAKEDB ( db )
DIAMOND_BLASTP ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db, outext )
} }

View file

@ -1,8 +1,23 @@
- name: diamond blastp - name: diamond blastp test_diamond_blastp
command: nextflow run ./tests/modules/diamond/blastp -entry test_diamond_blastp -c ./tests/config/nextflow.config -c ./tests/modules/diamond/blastp/nextflow.config command: nextflow run tests/modules/diamond/blastp -entry test_diamond_blastp -c tests/config/nextflow.config
tags: tags:
- diamond - diamond
- diamond/blastp - diamond/blastp
files: files:
- path: ./output/diamond/test.diamond_blastp.txt - path: output/diamond/genome.fasta.dmnd
md5sum: 2447fb376394c20d43ea3aad2aa5d15d
- path: output/diamond/test.diamond_blastp.txt
md5sum: 3ca7f6290c1d8741c573370e6f8b4db0 md5sum: 3ca7f6290c1d8741c573370e6f8b4db0
- path: output/diamond/versions.yml
- name: diamond blastp test_diamond_blastp_daa
command: nextflow run tests/modules/diamond/blastp -entry test_diamond_blastp_daa -c tests/config/nextflow.config
tags:
- diamond
- diamond/blastp
files:
- path: output/diamond/genome.fasta.dmnd
md5sum: 2447fb376394c20d43ea3aad2aa5d15d
- path: output/diamond/test.diamond_blastp.daa
md5sum: d4a79ad1fcb2ec69460e5a09a9468db7
- path: output/diamond/versions.yml

View file

@ -9,7 +9,18 @@ workflow test_diamond_blastx {
db = [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ] db = [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ]
fasta = [ file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true) ] fasta = [ file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true) ]
outext = 'txt'
DIAMOND_MAKEDB ( db ) DIAMOND_MAKEDB ( db )
DIAMOND_BLASTX ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db ) DIAMOND_BLASTX ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db, outext )
}
workflow test_diamond_blastx_daa {
db = [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ]
fasta = [ file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true) ]
outext = 'daa'
DIAMOND_MAKEDB ( db )
DIAMOND_BLASTX ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db, outext )
} }

View file

@ -1,8 +1,24 @@
- name: diamond blastx - name: diamond blastx test_diamond_blastx
command: nextflow run ./tests/modules/diamond/blastx -entry test_diamond_blastx -c ./tests/config/nextflow.config -c ./tests/modules/diamond/blastx/nextflow.config command: nextflow run tests/modules/diamond/blastx -entry test_diamond_blastx -c tests/config/nextflow.config
tags: tags:
- diamond - diamond
- diamond/blastx - diamond/blastx
files: files:
- path: ./output/diamond/test.diamond_blastx.txt - path: output/diamond/genome.fasta.dmnd
md5sum: d41d8cd98f00b204e9800998ecf8427e md5sum: 2447fb376394c20d43ea3aad2aa5d15d
- path: output/diamond/test.diamond_blastx.txt
- path: output/diamond/versions.yml
md5sum: 747934f57b7c0f8901570500f206eac6
- name: diamond blastx test_diamond_blastx_daa
command: nextflow run tests/modules/diamond/blastx -entry test_diamond_blastx_daa -c tests/config/nextflow.config
tags:
- diamond
- diamond/blastx
files:
- path: output/diamond/genome.fasta.dmnd
md5sum: 2447fb376394c20d43ea3aad2aa5d15d
- path: output/diamond/test.diamond_blastx.daa
md5sum: 2a0ce0f7e01dcead828b87d5cbaccf7a
- path: output/diamond/versions.yml
md5sum: 05cbabfd500fc17e26b3d8061c5a78c3