Fix output channels allow BLAST table customisation

2024-12-22 02:58:17 +00:00 · 2022-05-02 11:39:07 +02:00 · 2022-05-02 11:39:07 +02:00 · 6986975bc0
commit 6986975bc0
parent 996385fb0f
6 changed files with 88 additions and 14 deletions
--- a/modules/diamond/blastp/main.nf
+++ b/modules/diamond/blastp/main.nf
@ -11,10 +11,17 @@ process DIAMOND_BLASTP {
    tuple val(meta), path(fasta)
    path db
    val outext
+    val blast_columns

    output:
-    tuple val(meta), path('*.{blast,xml,txt,daa,sam,tsv,paf}'), emit: output
-    path "versions.yml"           , emit: versions
+    tuple val(meta), path('*.{blast}'), optional: true, emit: blast
+    tuple val(meta), path('*.{xml}')  , optional: true, emit: xml
+    tuple val(meta), path('*.{txt}')  , optional: true, emit: txt
+    tuple val(meta), path('*.{daa}')  , optional: true, emit: daa
+    tuple val(meta), path('*.{sam}')  , optional: true, emit: sam
+    tuple val(meta), path('*.{tsv}')  , optional: true, emit: tsv
+    tuple val(meta), path('*.{paf}')  , optional: true, emit: paf
+    path "versions.yml"               , emit: versions

    when:
    task.ext.when == null || task.ext.when
@ -22,6 +29,7 @@ process DIAMOND_BLASTP {
    script:
    def args = task.ext.args ?: ''
    def prefix = task.ext.prefix ?: "${meta.id}"
+    def columns = blast_columns ? "${blast_columns}" : ''
    switch ( outext ) {
        case "blast": outfmt = 0; break
        case "xml": outfmt = 5; break
@ -39,7 +47,7 @@ process DIAMOND_BLASTP {
        --threads $task.cpus \\
        --db \$DB \\
        --query $fasta \\
-        --outfmt ${outfmt} \\
+        --outfmt ${outfmt} ${columns} \\
        $args \\
        --out ${prefix}.${outext}

--- a/modules/diamond/blastp/meta.yml
+++ b/modules/diamond/blastp/meta.yml
@ -36,12 +36,42 @@ input:
        `txt` corresponds to to BLAST tabular format. `tsv` corresponds to
        taxonomic classification format.
      pattern: "blast|xml|txt|daa|sam|tsv|paf"
+  - blast_columns:
+      type: string
+      description: |
+        Optional space separated list of DIAMOND tabular BLAST output keywords
+        used for in conjunction with the 'txt' outext option (--outfmt 6). See
+        DIAMOND documnetation for more information.

 output:
-  - txt:
+  - blast:
      type: file
      description: File containing blastp hits
-      pattern: "*.{blastp.txt}"
+      pattern: "*.{blast}"
+  - xml:
+      type: file
+      description: File containing blastp hits
+      pattern: "*.{xml}"
+  - txt:
+      type: file
+      description: File containing hits in tabular BLAST format.
+      pattern: "*.{txt}"
+  - daa:
+      type: file
+      description: File containing hits DAA format
+      pattern: "*.{daa}"
+  - sam:
+      type: file
+      description: File containing aligned reads in SAM format
+      pattern: "*.{sam}"
+  - tsv:
+      type: file
+      description: Tab separated file containing taxonomic classification of hits
+      pattern: "*.{tsv}"
+  - paf:
+      type: file
+      description: File containing aligned reads in pairwise mapping format format
+      pattern: "*.{paf}"
  - versions:
      type: file
      description: File containing software versions
--- a/modules/diamond/blastx/main.nf
+++ b/modules/diamond/blastx/main.nf
@ -11,10 +11,17 @@ process DIAMOND_BLASTX {
    tuple val(meta), path(fasta)
    path db
    val outext
+    val blast_columns

    output:
-    tuple val(meta), path('*.{blast,xml,txt,daa,sam,tsv,paf}'), emit: output
-    path "versions.yml"           , emit: versions
+    tuple val(meta), path('*.{blast}'), optional: true, emit: blast
+    tuple val(meta), path('*.{xml}')  , optional: true, emit: xml
+    tuple val(meta), path('*.{txt}')  , optional: true, emit: txt
+    tuple val(meta), path('*.{daa}')  , optional: true, emit: daa
+    tuple val(meta), path('*.{sam}')  , optional: true, emit: sam
+    tuple val(meta), path('*.{tsv}')  , optional: true, emit: tsv
+    tuple val(meta), path('*.{paf}')  , optional: true, emit: paf
+    path "versions.yml"                               , emit: versions

    when:
    task.ext.when == null || task.ext.when
@ -22,6 +29,7 @@ process DIAMOND_BLASTX {
    script:
    def args = task.ext.args ?: ''
    def prefix = task.ext.prefix ?: "${meta.id}"
+    def columns = blast_columns ? "${blast_columns}" : ''
    switch ( outext ) {
        case "blast": outfmt = 0; break
        case "xml": outfmt = 5; break
@ -39,7 +47,7 @@ process DIAMOND_BLASTX {
        --threads $task.cpus \\
        --db \$DB \\
        --query $fasta \\
-        --outfmt ${outfmt} \\
+        --outfmt ${outfmt} ${columns} \\
        $args \\
        --out ${prefix}.${outext}

--- a/modules/diamond/blastx/meta.yml
+++ b/modules/diamond/blastx/meta.yml
@ -38,10 +38,34 @@ input:
      pattern: "blast|xml|txt|daa|sam|tsv|paf"

 output:
+  - blast:
+      type: file
+      description: File containing blastp hits
+      pattern: "*.{blast}"
+  - xml:
+      type: file
+      description: File containing blastp hits
+      pattern: "*.{xml}"
  - txt:
      type: file
-      description: File containing blastx hits
-      pattern: "*.{blastx.txt}"
+      description: File containing hits in tabular BLAST format.
+      pattern: "*.{txt}"
+  - daa:
+      type: file
+      description: File containing hits DAA format
+      pattern: "*.{daa}"
+  - sam:
+      type: file
+      description: File containing aligned reads in SAM format
+      pattern: "*.{sam}"
+  - tsv:
+      type: file
+      description: Tab separated file containing taxonomic classification of hits
+      pattern: "*.{tsv}"
+  - paf:
+      type: file
+      description: File containing aligned reads in pairwise mapping format format
+      pattern: "*.{paf}"
  - versions:
      type: file
      description: File containing software versions
--- a/tests/modules/diamond/blastp/main.nf
+++ b/tests/modules/diamond/blastp/main.nf
@ -10,9 +10,10 @@ workflow test_diamond_blastp {
    db = [ file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ]
    fasta = [ file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ]
    outext = 'txt'
+    blast_columns = 'qseqid qlen'

    DIAMOND_MAKEDB ( db )
-    DIAMOND_BLASTP ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db, outext )
+    DIAMOND_BLASTP ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db, outext, blast_columns )
 }

 workflow test_diamond_blastp_daa {
@ -20,7 +21,8 @@ workflow test_diamond_blastp_daa {
    db = [ file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ]
    fasta = [ file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ]
    outext = 'daa'
+    blast_columns = []

    DIAMOND_MAKEDB ( db )
-    DIAMOND_BLASTP ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db, outext )
+    DIAMOND_BLASTP ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db, outext, blast_columns )
 }
--- a/tests/modules/diamond/blastx/main.nf
+++ b/tests/modules/diamond/blastx/main.nf
@ -10,9 +10,10 @@ workflow test_diamond_blastx {
    db = [ file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ]
    fasta = [ file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true) ]
    outext = 'txt'
+    blast_columns = 'qseqid qlen'

    DIAMOND_MAKEDB ( db )
-    DIAMOND_BLASTX ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db, outext )
+    DIAMOND_BLASTX ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db, outext, blast_columns )
 }

 workflow test_diamond_blastx_daa {
@ -20,7 +21,8 @@ workflow test_diamond_blastx_daa {
    db = [ file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ]
    fasta = [ file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true) ]
    outext = 'daa'
+    blast_columns = []

    DIAMOND_MAKEDB ( db )
-    DIAMOND_BLASTX ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db, outext )
+    DIAMOND_BLASTX ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db, outext, blast_columns )
 }