Merge pull request #1595 from jfy133/diamond-update

Standardise DIAMOND output channels and md5sum
This commit is contained in:
James A. Fellows Yates 2022-05-02 13:43:56 +02:00 committed by GitHub
commit 92efb83ec8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 110 additions and 29 deletions

View file

@ -10,11 +10,18 @@ process DIAMOND_BLASTP {
input: input:
tuple val(meta), path(fasta) tuple val(meta), path(fasta)
path db path db
val outext val out_ext
val blast_columns
output: output:
tuple val(meta), path('*.{blast,xml,txt,daa,sam,tsv,paf}'), emit: output tuple val(meta), path('*.blast'), optional: true, emit: blast
path "versions.yml" , emit: versions tuple val(meta), path('*.xml') , optional: true, emit: xml
tuple val(meta), path('*.txt') , optional: true, emit: txt
tuple val(meta), path('*.daa') , optional: true, emit: daa
tuple val(meta), path('*.sam') , optional: true, emit: sam
tuple val(meta), path('*.tsv') , optional: true, emit: tsv
tuple val(meta), path('*.paf') , optional: true, emit: paf
path "versions.yml" , emit: versions
when: when:
task.ext.when == null || task.ext.when task.ext.when == null || task.ext.when
@ -22,7 +29,8 @@ process DIAMOND_BLASTP {
script: script:
def args = task.ext.args ?: '' def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}" def prefix = task.ext.prefix ?: "${meta.id}"
switch ( outext ) { def columns = blast_columns ? "${blast_columns}" : ''
switch ( out_ext ) {
case "blast": outfmt = 0; break case "blast": outfmt = 0; break
case "xml": outfmt = 5; break case "xml": outfmt = 5; break
case "txt": outfmt = 6; break case "txt": outfmt = 6; break
@ -30,6 +38,11 @@ process DIAMOND_BLASTP {
case "sam": outfmt = 101; break case "sam": outfmt = 101; break
case "tsv": outfmt = 102; break case "tsv": outfmt = 102; break
case "paf": outfmt = 103; break case "paf": outfmt = 103; break
default:
outfmt = '6';
out_ext = 'txt';
log.warn("Unknown output file format provided (${out_ext}): selecting DIAMOND default of tabular BLAST output (txt)");
break
} }
""" """
DB=`find -L ./ -name "*.dmnd" | sed 's/.dmnd//'` DB=`find -L ./ -name "*.dmnd" | sed 's/.dmnd//'`
@ -39,9 +52,9 @@ process DIAMOND_BLASTP {
--threads $task.cpus \\ --threads $task.cpus \\
--db \$DB \\ --db \$DB \\
--query $fasta \\ --query $fasta \\
--outfmt ${outfmt} \\ --outfmt ${outfmt} ${columns} \\
$args \\ $args \\
--out ${prefix}.${outext} --out ${prefix}.${out_ext}
cat <<-END_VERSIONS > versions.yml cat <<-END_VERSIONS > versions.yml
"${task.process}": "${task.process}":

View file

@ -28,7 +28,7 @@ input:
type: directory type: directory
description: Directory containing the protein blast database description: Directory containing the protein blast database
pattern: "*" pattern: "*"
- outext: - out_ext:
type: string type: string
description: | description: |
Specify the type of output file to be generated. `blast` corresponds to Specify the type of output file to be generated. `blast` corresponds to
@ -36,12 +36,42 @@ input:
`txt` corresponds to to BLAST tabular format. `tsv` corresponds to `txt` corresponds to to BLAST tabular format. `tsv` corresponds to
taxonomic classification format. taxonomic classification format.
pattern: "blast|xml|txt|daa|sam|tsv|paf" pattern: "blast|xml|txt|daa|sam|tsv|paf"
- blast_columns:
type: string
description: |
Optional space separated list of DIAMOND tabular BLAST output keywords
used for in conjunction with the 'txt' out_ext option (--outfmt 6). See
DIAMOND documnetation for more information.
output: output:
- txt: - blast:
type: file type: file
description: File containing blastp hits description: File containing blastp hits
pattern: "*.{blastp.txt}" pattern: "*.{blast}"
- xml:
type: file
description: File containing blastp hits
pattern: "*.{xml}"
- txt:
type: file
description: File containing hits in tabular BLAST format.
pattern: "*.{txt}"
- daa:
type: file
description: File containing hits DAA format
pattern: "*.{daa}"
- sam:
type: file
description: File containing aligned reads in SAM format
pattern: "*.{sam}"
- tsv:
type: file
description: Tab separated file containing taxonomic classification of hits
pattern: "*.{tsv}"
- paf:
type: file
description: File containing aligned reads in pairwise mapping format format
pattern: "*.{paf}"
- versions: - versions:
type: file type: file
description: File containing software versions description: File containing software versions

View file

@ -10,11 +10,18 @@ process DIAMOND_BLASTX {
input: input:
tuple val(meta), path(fasta) tuple val(meta), path(fasta)
path db path db
val outext val out_ext
val blast_columns
output: output:
tuple val(meta), path('*.{blast,xml,txt,daa,sam,tsv,paf}'), emit: output tuple val(meta), path('*.blast'), optional: true, emit: blast
path "versions.yml" , emit: versions tuple val(meta), path('*.xml') , optional: true, emit: xml
tuple val(meta), path('*.txt') , optional: true, emit: txt
tuple val(meta), path('*.daa') , optional: true, emit: daa
tuple val(meta), path('*.sam') , optional: true, emit: sam
tuple val(meta), path('*.tsv') , optional: true, emit: tsv
tuple val(meta), path('*.paf') , optional: true, emit: paf
path "versions.yml" , emit: versions
when: when:
task.ext.when == null || task.ext.when task.ext.when == null || task.ext.when
@ -22,7 +29,8 @@ process DIAMOND_BLASTX {
script: script:
def args = task.ext.args ?: '' def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}" def prefix = task.ext.prefix ?: "${meta.id}"
switch ( outext ) { def columns = blast_columns ? "${blast_columns}" : ''
switch ( out_ext ) {
case "blast": outfmt = 0; break case "blast": outfmt = 0; break
case "xml": outfmt = 5; break case "xml": outfmt = 5; break
case "txt": outfmt = 6; break case "txt": outfmt = 6; break
@ -30,6 +38,11 @@ process DIAMOND_BLASTX {
case "sam": outfmt = 101; break case "sam": outfmt = 101; break
case "tsv": outfmt = 102; break case "tsv": outfmt = 102; break
case "paf": outfmt = 103; break case "paf": outfmt = 103; break
default:
outfmt = '6';
out_ext = 'txt';
log.warn("Unknown output file format provided (${out_ext}): selecting DIAMOND default of tabular BLAST output (txt)");
break
} }
""" """
DB=`find -L ./ -name "*.dmnd" | sed 's/.dmnd//'` DB=`find -L ./ -name "*.dmnd" | sed 's/.dmnd//'`
@ -39,9 +52,9 @@ process DIAMOND_BLASTX {
--threads $task.cpus \\ --threads $task.cpus \\
--db \$DB \\ --db \$DB \\
--query $fasta \\ --query $fasta \\
--outfmt ${outfmt} \\ --outfmt ${outfmt} ${columns} \\
$args \\ $args \\
--out ${prefix}.${outext} --out ${prefix}.${out_ext}
cat <<-END_VERSIONS > versions.yml cat <<-END_VERSIONS > versions.yml
"${task.process}": "${task.process}":

View file

@ -28,7 +28,7 @@ input:
type: directory type: directory
description: Directory containing the nucelotide blast database description: Directory containing the nucelotide blast database
pattern: "*" pattern: "*"
- outext: - out_ext:
type: string type: string
description: | description: |
Specify the type of output file to be generated. `blast` corresponds to Specify the type of output file to be generated. `blast` corresponds to
@ -38,10 +38,34 @@ input:
pattern: "blast|xml|txt|daa|sam|tsv|paf" pattern: "blast|xml|txt|daa|sam|tsv|paf"
output: output:
- blast:
type: file
description: File containing blastp hits
pattern: "*.{blast}"
- xml:
type: file
description: File containing blastp hits
pattern: "*.{xml}"
- txt: - txt:
type: file type: file
description: File containing blastx hits description: File containing hits in tabular BLAST format.
pattern: "*.{blastx.txt}" pattern: "*.{txt}"
- daa:
type: file
description: File containing hits DAA format
pattern: "*.{daa}"
- sam:
type: file
description: File containing aligned reads in SAM format
pattern: "*.{sam}"
- tsv:
type: file
description: Tab separated file containing taxonomic classification of hits
pattern: "*.{tsv}"
- paf:
type: file
description: File containing aligned reads in pairwise mapping format format
pattern: "*.{paf}"
- versions: - versions:
type: file type: file
description: File containing software versions description: File containing software versions

View file

@ -9,18 +9,20 @@ workflow test_diamond_blastp {
db = [ file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ] db = [ file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ]
fasta = [ file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ] fasta = [ file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ]
outext = 'txt' out_ext = 'txt'
blast_columns = 'qseqid qlen'
DIAMOND_MAKEDB ( db ) DIAMOND_MAKEDB ( db )
DIAMOND_BLASTP ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db, outext ) DIAMOND_BLASTP ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db, out_ext, blast_columns )
} }
workflow test_diamond_blastp_daa { workflow test_diamond_blastp_daa {
db = [ file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ] db = [ file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ]
fasta = [ file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ] fasta = [ file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ]
outext = 'daa' out_ext = 'daa'
blast_columns = []
DIAMOND_MAKEDB ( db ) DIAMOND_MAKEDB ( db )
DIAMOND_BLASTP ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db, outext ) DIAMOND_BLASTP ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db, out_ext, blast_columns )
} }

View file

@ -5,7 +5,6 @@
- diamond - diamond
files: files:
- path: output/diamond/test.diamond_blastp.txt - path: output/diamond/test.diamond_blastp.txt
md5sum: 2515cf88590afa32356497e79a51fce9
- path: output/diamond/versions.yml - path: output/diamond/versions.yml
- name: diamond blastp test_diamond_blastp_daa - name: diamond blastp test_diamond_blastp_daa
@ -15,5 +14,4 @@
- diamond - diamond
files: files:
- path: output/diamond/test.diamond_blastp.daa - path: output/diamond/test.diamond_blastp.daa
md5sum: 0b539c68a5b66dd6e20ad5d218f4f4c6
- path: output/diamond/versions.yml - path: output/diamond/versions.yml

View file

@ -9,18 +9,20 @@ workflow test_diamond_blastx {
db = [ file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ] db = [ file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ]
fasta = [ file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true) ] fasta = [ file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true) ]
outext = 'txt' out_ext = 'tfdfdt' // Nonsense file extension to check default case.
blast_columns = 'qseqid qlen'
DIAMOND_MAKEDB ( db ) DIAMOND_MAKEDB ( db )
DIAMOND_BLASTX ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db, outext ) DIAMOND_BLASTX ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db, out_ext, blast_columns )
} }
workflow test_diamond_blastx_daa { workflow test_diamond_blastx_daa {
db = [ file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ] db = [ file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ]
fasta = [ file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true) ] fasta = [ file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true) ]
outext = 'daa' out_ext = 'daa'
blast_columns = []
DIAMOND_MAKEDB ( db ) DIAMOND_MAKEDB ( db )
DIAMOND_BLASTX ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db, outext ) DIAMOND_BLASTX ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db, out_ext, blast_columns )
} }

View file

@ -5,7 +5,6 @@
- diamond/blastx - diamond/blastx
files: files:
- path: output/diamond/test.diamond_blastx.txt - path: output/diamond/test.diamond_blastx.txt
md5sum: eb2aebfa1cb42fcb2121c65528663307
- path: output/diamond/versions.yml - path: output/diamond/versions.yml
- name: diamond blastx test_diamond_blastx_daa - name: diamond blastx test_diamond_blastx_daa