Fix output channels allow BLAST table customisation

This commit is contained in:
James Fellows Yates 2022-05-02 11:39:07 +02:00
parent 996385fb0f
commit 6986975bc0
6 changed files with 88 additions and 14 deletions

View file

@ -11,10 +11,17 @@ process DIAMOND_BLASTP {
tuple val(meta), path(fasta) tuple val(meta), path(fasta)
path db path db
val outext val outext
val blast_columns
output: output:
tuple val(meta), path('*.{blast,xml,txt,daa,sam,tsv,paf}'), emit: output tuple val(meta), path('*.{blast}'), optional: true, emit: blast
path "versions.yml" , emit: versions tuple val(meta), path('*.{xml}') , optional: true, emit: xml
tuple val(meta), path('*.{txt}') , optional: true, emit: txt
tuple val(meta), path('*.{daa}') , optional: true, emit: daa
tuple val(meta), path('*.{sam}') , optional: true, emit: sam
tuple val(meta), path('*.{tsv}') , optional: true, emit: tsv
tuple val(meta), path('*.{paf}') , optional: true, emit: paf
path "versions.yml" , emit: versions
when: when:
task.ext.when == null || task.ext.when task.ext.when == null || task.ext.when
@ -22,6 +29,7 @@ process DIAMOND_BLASTP {
script: script:
def args = task.ext.args ?: '' def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}" def prefix = task.ext.prefix ?: "${meta.id}"
def columns = blast_columns ? "${blast_columns}" : ''
switch ( outext ) { switch ( outext ) {
case "blast": outfmt = 0; break case "blast": outfmt = 0; break
case "xml": outfmt = 5; break case "xml": outfmt = 5; break
@ -39,7 +47,7 @@ process DIAMOND_BLASTP {
--threads $task.cpus \\ --threads $task.cpus \\
--db \$DB \\ --db \$DB \\
--query $fasta \\ --query $fasta \\
--outfmt ${outfmt} \\ --outfmt ${outfmt} ${columns} \\
$args \\ $args \\
--out ${prefix}.${outext} --out ${prefix}.${outext}

View file

@ -36,12 +36,42 @@ input:
`txt` corresponds to to BLAST tabular format. `tsv` corresponds to `txt` corresponds to to BLAST tabular format. `tsv` corresponds to
taxonomic classification format. taxonomic classification format.
pattern: "blast|xml|txt|daa|sam|tsv|paf" pattern: "blast|xml|txt|daa|sam|tsv|paf"
- blast_columns:
type: string
description: |
Optional space separated list of DIAMOND tabular BLAST output keywords
used for in conjunction with the 'txt' outext option (--outfmt 6). See
DIAMOND documnetation for more information.
output: output:
- txt: - blast:
type: file type: file
description: File containing blastp hits description: File containing blastp hits
pattern: "*.{blastp.txt}" pattern: "*.{blast}"
- xml:
type: file
description: File containing blastp hits
pattern: "*.{xml}"
- txt:
type: file
description: File containing hits in tabular BLAST format.
pattern: "*.{txt}"
- daa:
type: file
description: File containing hits DAA format
pattern: "*.{daa}"
- sam:
type: file
description: File containing aligned reads in SAM format
pattern: "*.{sam}"
- tsv:
type: file
description: Tab separated file containing taxonomic classification of hits
pattern: "*.{tsv}"
- paf:
type: file
description: File containing aligned reads in pairwise mapping format format
pattern: "*.{paf}"
- versions: - versions:
type: file type: file
description: File containing software versions description: File containing software versions

View file

@ -11,10 +11,17 @@ process DIAMOND_BLASTX {
tuple val(meta), path(fasta) tuple val(meta), path(fasta)
path db path db
val outext val outext
val blast_columns
output: output:
tuple val(meta), path('*.{blast,xml,txt,daa,sam,tsv,paf}'), emit: output tuple val(meta), path('*.{blast}'), optional: true, emit: blast
path "versions.yml" , emit: versions tuple val(meta), path('*.{xml}') , optional: true, emit: xml
tuple val(meta), path('*.{txt}') , optional: true, emit: txt
tuple val(meta), path('*.{daa}') , optional: true, emit: daa
tuple val(meta), path('*.{sam}') , optional: true, emit: sam
tuple val(meta), path('*.{tsv}') , optional: true, emit: tsv
tuple val(meta), path('*.{paf}') , optional: true, emit: paf
path "versions.yml" , emit: versions
when: when:
task.ext.when == null || task.ext.when task.ext.when == null || task.ext.when
@ -22,6 +29,7 @@ process DIAMOND_BLASTX {
script: script:
def args = task.ext.args ?: '' def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}" def prefix = task.ext.prefix ?: "${meta.id}"
def columns = blast_columns ? "${blast_columns}" : ''
switch ( outext ) { switch ( outext ) {
case "blast": outfmt = 0; break case "blast": outfmt = 0; break
case "xml": outfmt = 5; break case "xml": outfmt = 5; break
@ -39,7 +47,7 @@ process DIAMOND_BLASTX {
--threads $task.cpus \\ --threads $task.cpus \\
--db \$DB \\ --db \$DB \\
--query $fasta \\ --query $fasta \\
--outfmt ${outfmt} \\ --outfmt ${outfmt} ${columns} \\
$args \\ $args \\
--out ${prefix}.${outext} --out ${prefix}.${outext}

View file

@ -38,10 +38,34 @@ input:
pattern: "blast|xml|txt|daa|sam|tsv|paf" pattern: "blast|xml|txt|daa|sam|tsv|paf"
output: output:
- blast:
type: file
description: File containing blastp hits
pattern: "*.{blast}"
- xml:
type: file
description: File containing blastp hits
pattern: "*.{xml}"
- txt: - txt:
type: file type: file
description: File containing blastx hits description: File containing hits in tabular BLAST format.
pattern: "*.{blastx.txt}" pattern: "*.{txt}"
- daa:
type: file
description: File containing hits DAA format
pattern: "*.{daa}"
- sam:
type: file
description: File containing aligned reads in SAM format
pattern: "*.{sam}"
- tsv:
type: file
description: Tab separated file containing taxonomic classification of hits
pattern: "*.{tsv}"
- paf:
type: file
description: File containing aligned reads in pairwise mapping format format
pattern: "*.{paf}"
- versions: - versions:
type: file type: file
description: File containing software versions description: File containing software versions

View file

@ -10,9 +10,10 @@ workflow test_diamond_blastp {
db = [ file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ] db = [ file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ]
fasta = [ file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ] fasta = [ file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ]
outext = 'txt' outext = 'txt'
blast_columns = 'qseqid qlen'
DIAMOND_MAKEDB ( db ) DIAMOND_MAKEDB ( db )
DIAMOND_BLASTP ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db, outext ) DIAMOND_BLASTP ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db, outext, blast_columns )
} }
workflow test_diamond_blastp_daa { workflow test_diamond_blastp_daa {
@ -20,7 +21,8 @@ workflow test_diamond_blastp_daa {
db = [ file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ] db = [ file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ]
fasta = [ file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ] fasta = [ file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ]
outext = 'daa' outext = 'daa'
blast_columns = []
DIAMOND_MAKEDB ( db ) DIAMOND_MAKEDB ( db )
DIAMOND_BLASTP ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db, outext ) DIAMOND_BLASTP ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db, outext, blast_columns )
} }

View file

@ -10,9 +10,10 @@ workflow test_diamond_blastx {
db = [ file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ] db = [ file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ]
fasta = [ file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true) ] fasta = [ file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true) ]
outext = 'txt' outext = 'txt'
blast_columns = 'qseqid qlen'
DIAMOND_MAKEDB ( db ) DIAMOND_MAKEDB ( db )
DIAMOND_BLASTX ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db, outext ) DIAMOND_BLASTX ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db, outext, blast_columns )
} }
workflow test_diamond_blastx_daa { workflow test_diamond_blastx_daa {
@ -20,7 +21,8 @@ workflow test_diamond_blastx_daa {
db = [ file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ] db = [ file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ]
fasta = [ file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true) ] fasta = [ file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true) ]
outext = 'daa' outext = 'daa'
blast_columns = []
DIAMOND_MAKEDB ( db ) DIAMOND_MAKEDB ( db )
DIAMOND_BLASTX ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db, outext ) DIAMOND_BLASTX ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db, outext, blast_columns )
} }