diff --git a/modules/diamond/blastp/main.nf b/modules/diamond/blastp/main.nf index ccd455f4..033186ea 100644 --- a/modules/diamond/blastp/main.nf +++ b/modules/diamond/blastp/main.nf @@ -10,11 +10,18 @@ process DIAMOND_BLASTP { input: tuple val(meta), path(fasta) path db - val outext + val out_ext + val blast_columns output: - tuple val(meta), path('*.{blast,xml,txt,daa,sam,tsv,paf}'), emit: output - path "versions.yml" , emit: versions + tuple val(meta), path('*.blast'), optional: true, emit: blast + tuple val(meta), path('*.xml') , optional: true, emit: xml + tuple val(meta), path('*.txt') , optional: true, emit: txt + tuple val(meta), path('*.daa') , optional: true, emit: daa + tuple val(meta), path('*.sam') , optional: true, emit: sam + tuple val(meta), path('*.tsv') , optional: true, emit: tsv + tuple val(meta), path('*.paf') , optional: true, emit: paf + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -22,7 +29,8 @@ process DIAMOND_BLASTP { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - switch ( outext ) { + def columns = blast_columns ? "${blast_columns}" : '' + switch ( out_ext ) { case "blast": outfmt = 0; break case "xml": outfmt = 5; break case "txt": outfmt = 6; break @@ -30,6 +38,11 @@ process DIAMOND_BLASTP { case "sam": outfmt = 101; break case "tsv": outfmt = 102; break case "paf": outfmt = 103; break + default: + outfmt = '6'; + out_ext = 'txt'; + log.warn("Unknown output file format provided (${out_ext}): selecting DIAMOND default of tabular BLAST output (txt)"); + break } """ DB=`find -L ./ -name "*.dmnd" | sed 's/.dmnd//'` @@ -39,9 +52,9 @@ process DIAMOND_BLASTP { --threads $task.cpus \\ --db \$DB \\ --query $fasta \\ - --outfmt ${outfmt} \\ + --outfmt ${outfmt} ${columns} \\ $args \\ - --out ${prefix}.${outext} + --out ${prefix}.${out_ext} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/diamond/blastp/meta.yml b/modules/diamond/blastp/meta.yml index 1aa36c23..3aa81e53 100644 --- a/modules/diamond/blastp/meta.yml +++ b/modules/diamond/blastp/meta.yml @@ -28,7 +28,7 @@ input: type: directory description: Directory containing the protein blast database pattern: "*" - - outext: + - out_ext: type: string description: | Specify the type of output file to be generated. `blast` corresponds to @@ -36,12 +36,42 @@ input: `txt` corresponds to to BLAST tabular format. `tsv` corresponds to taxonomic classification format. pattern: "blast|xml|txt|daa|sam|tsv|paf" + - blast_columns: + type: string + description: | + Optional space separated list of DIAMOND tabular BLAST output keywords + used for in conjunction with the 'txt' out_ext option (--outfmt 6). See + DIAMOND documnetation for more information. output: - - txt: + - blast: type: file description: File containing blastp hits - pattern: "*.{blastp.txt}" + pattern: "*.{blast}" + - xml: + type: file + description: File containing blastp hits + pattern: "*.{xml}" + - txt: + type: file + description: File containing hits in tabular BLAST format. + pattern: "*.{txt}" + - daa: + type: file + description: File containing hits DAA format + pattern: "*.{daa}" + - sam: + type: file + description: File containing aligned reads in SAM format + pattern: "*.{sam}" + - tsv: + type: file + description: Tab separated file containing taxonomic classification of hits + pattern: "*.{tsv}" + - paf: + type: file + description: File containing aligned reads in pairwise mapping format format + pattern: "*.{paf}" - versions: type: file description: File containing software versions diff --git a/modules/diamond/blastx/main.nf b/modules/diamond/blastx/main.nf index 357427eb..d3272279 100644 --- a/modules/diamond/blastx/main.nf +++ b/modules/diamond/blastx/main.nf @@ -10,11 +10,18 @@ process DIAMOND_BLASTX { input: tuple val(meta), path(fasta) path db - val outext + val out_ext + val blast_columns output: - tuple val(meta), path('*.{blast,xml,txt,daa,sam,tsv,paf}'), emit: output - path "versions.yml" , emit: versions + tuple val(meta), path('*.blast'), optional: true, emit: blast + tuple val(meta), path('*.xml') , optional: true, emit: xml + tuple val(meta), path('*.txt') , optional: true, emit: txt + tuple val(meta), path('*.daa') , optional: true, emit: daa + tuple val(meta), path('*.sam') , optional: true, emit: sam + tuple val(meta), path('*.tsv') , optional: true, emit: tsv + tuple val(meta), path('*.paf') , optional: true, emit: paf + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -22,7 +29,8 @@ process DIAMOND_BLASTX { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - switch ( outext ) { + def columns = blast_columns ? "${blast_columns}" : '' + switch ( out_ext ) { case "blast": outfmt = 0; break case "xml": outfmt = 5; break case "txt": outfmt = 6; break @@ -30,6 +38,11 @@ process DIAMOND_BLASTX { case "sam": outfmt = 101; break case "tsv": outfmt = 102; break case "paf": outfmt = 103; break + default: + outfmt = '6'; + out_ext = 'txt'; + log.warn("Unknown output file format provided (${out_ext}): selecting DIAMOND default of tabular BLAST output (txt)"); + break } """ DB=`find -L ./ -name "*.dmnd" | sed 's/.dmnd//'` @@ -39,9 +52,9 @@ process DIAMOND_BLASTX { --threads $task.cpus \\ --db \$DB \\ --query $fasta \\ - --outfmt ${outfmt} \\ + --outfmt ${outfmt} ${columns} \\ $args \\ - --out ${prefix}.${outext} + --out ${prefix}.${out_ext} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/diamond/blastx/meta.yml b/modules/diamond/blastx/meta.yml index 5ee2d55e..2dcd7bc6 100644 --- a/modules/diamond/blastx/meta.yml +++ b/modules/diamond/blastx/meta.yml @@ -28,7 +28,7 @@ input: type: directory description: Directory containing the nucelotide blast database pattern: "*" - - outext: + - out_ext: type: string description: | Specify the type of output file to be generated. `blast` corresponds to @@ -38,10 +38,34 @@ input: pattern: "blast|xml|txt|daa|sam|tsv|paf" output: + - blast: + type: file + description: File containing blastp hits + pattern: "*.{blast}" + - xml: + type: file + description: File containing blastp hits + pattern: "*.{xml}" - txt: type: file - description: File containing blastx hits - pattern: "*.{blastx.txt}" + description: File containing hits in tabular BLAST format. + pattern: "*.{txt}" + - daa: + type: file + description: File containing hits DAA format + pattern: "*.{daa}" + - sam: + type: file + description: File containing aligned reads in SAM format + pattern: "*.{sam}" + - tsv: + type: file + description: Tab separated file containing taxonomic classification of hits + pattern: "*.{tsv}" + - paf: + type: file + description: File containing aligned reads in pairwise mapping format format + pattern: "*.{paf}" - versions: type: file description: File containing software versions diff --git a/tests/modules/diamond/blastp/main.nf b/tests/modules/diamond/blastp/main.nf index 80ea2ec5..ff669233 100644 --- a/tests/modules/diamond/blastp/main.nf +++ b/tests/modules/diamond/blastp/main.nf @@ -9,18 +9,20 @@ workflow test_diamond_blastp { db = [ file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ] fasta = [ file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ] - outext = 'txt' + out_ext = 'txt' + blast_columns = 'qseqid qlen' DIAMOND_MAKEDB ( db ) - DIAMOND_BLASTP ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db, outext ) + DIAMOND_BLASTP ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db, out_ext, blast_columns ) } workflow test_diamond_blastp_daa { db = [ file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ] fasta = [ file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ] - outext = 'daa' + out_ext = 'daa' + blast_columns = [] DIAMOND_MAKEDB ( db ) - DIAMOND_BLASTP ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db, outext ) + DIAMOND_BLASTP ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db, out_ext, blast_columns ) } diff --git a/tests/modules/diamond/blastp/test.yml b/tests/modules/diamond/blastp/test.yml index c2b8b6f5..aff4e1c5 100644 --- a/tests/modules/diamond/blastp/test.yml +++ b/tests/modules/diamond/blastp/test.yml @@ -5,7 +5,6 @@ - diamond files: - path: output/diamond/test.diamond_blastp.txt - md5sum: 2515cf88590afa32356497e79a51fce9 - path: output/diamond/versions.yml - name: diamond blastp test_diamond_blastp_daa @@ -15,5 +14,4 @@ - diamond files: - path: output/diamond/test.diamond_blastp.daa - md5sum: 0b539c68a5b66dd6e20ad5d218f4f4c6 - path: output/diamond/versions.yml diff --git a/tests/modules/diamond/blastx/main.nf b/tests/modules/diamond/blastx/main.nf index d5949762..8316aa91 100644 --- a/tests/modules/diamond/blastx/main.nf +++ b/tests/modules/diamond/blastx/main.nf @@ -9,18 +9,20 @@ workflow test_diamond_blastx { db = [ file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ] fasta = [ file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true) ] - outext = 'txt' + out_ext = 'tfdfdt' // Nonsense file extension to check default case. + blast_columns = 'qseqid qlen' DIAMOND_MAKEDB ( db ) - DIAMOND_BLASTX ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db, outext ) + DIAMOND_BLASTX ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db, out_ext, blast_columns ) } workflow test_diamond_blastx_daa { db = [ file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ] fasta = [ file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true) ] - outext = 'daa' + out_ext = 'daa' + blast_columns = [] DIAMOND_MAKEDB ( db ) - DIAMOND_BLASTX ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db, outext ) + DIAMOND_BLASTX ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db, out_ext, blast_columns ) } diff --git a/tests/modules/diamond/blastx/test.yml b/tests/modules/diamond/blastx/test.yml index 9c30ba25..b2b6149f 100644 --- a/tests/modules/diamond/blastx/test.yml +++ b/tests/modules/diamond/blastx/test.yml @@ -5,7 +5,6 @@ - diamond/blastx files: - path: output/diamond/test.diamond_blastx.txt - md5sum: eb2aebfa1cb42fcb2121c65528663307 - path: output/diamond/versions.yml - name: diamond blastx test_diamond_blastx_daa