Merge pull request #1595 from jfy133/diamond-update

Standardise DIAMOND output channels and md5sum
2024-12-22 02:58:17 +00:00 · 2022-05-02 13:43:56 +02:00 · 2022-05-02 13:43:56 +02:00 · 92efb83ec8
commit 92efb83ec8
parent 0b01607659 f1c2f624eb
8 changed files with 110 additions and 29 deletions
--- a/modules/diamond/blastp/main.nf
+++ b/modules/diamond/blastp/main.nf
@ -10,11 +10,18 @@ process DIAMOND_BLASTP {
    input:
    tuple val(meta), path(fasta)
    path db
-    val outext
+    val out_ext
+    val blast_columns

    output:
-    tuple val(meta), path('*.{blast,xml,txt,daa,sam,tsv,paf}'), emit: output
-    path "versions.yml"           , emit: versions
+    tuple val(meta), path('*.blast'), optional: true, emit: blast
+    tuple val(meta), path('*.xml')  , optional: true, emit: xml
+    tuple val(meta), path('*.txt')  , optional: true, emit: txt
+    tuple val(meta), path('*.daa')  , optional: true, emit: daa
+    tuple val(meta), path('*.sam')  , optional: true, emit: sam
+    tuple val(meta), path('*.tsv')  , optional: true, emit: tsv
+    tuple val(meta), path('*.paf')  , optional: true, emit: paf
+    path "versions.yml"               , emit: versions

    when:
    task.ext.when == null || task.ext.when
@ -22,7 +29,8 @@ process DIAMOND_BLASTP {
    script:
    def args = task.ext.args ?: ''
    def prefix = task.ext.prefix ?: "${meta.id}"
-    switch ( outext ) {
+    def columns = blast_columns ? "${blast_columns}" : ''
+    switch ( out_ext ) {
        case "blast": outfmt = 0; break
        case "xml": outfmt = 5; break
        case "txt": outfmt = 6; break
@ -30,6 +38,11 @@ process DIAMOND_BLASTP {
        case "sam": outfmt = 101; break
        case "tsv": outfmt = 102; break
        case "paf": outfmt = 103; break
+        default:
+            outfmt = '6';
+            out_ext = 'txt';
+            log.warn("Unknown output file format provided (${out_ext}): selecting DIAMOND default of tabular BLAST output (txt)");
+            break
    }
    """
    DB=`find -L ./ -name "*.dmnd" | sed 's/.dmnd//'`
@ -39,9 +52,9 @@ process DIAMOND_BLASTP {
        --threads $task.cpus \\
        --db \$DB \\
        --query $fasta \\
-        --outfmt ${outfmt} \\
+        --outfmt ${outfmt} ${columns} \\
        $args \\
-        --out ${prefix}.${outext}
+        --out ${prefix}.${out_ext}

    cat <<-END_VERSIONS > versions.yml
    "${task.process}":
--- a/modules/diamond/blastp/meta.yml
+++ b/modules/diamond/blastp/meta.yml
@ -28,7 +28,7 @@ input:
      type: directory
      description: Directory containing the protein blast database
      pattern: "*"
-  - outext:
+  - out_ext:
      type: string
      description: |
        Specify the type of output file to be generated. `blast` corresponds to
@ -36,12 +36,42 @@ input:
        `txt` corresponds to to BLAST tabular format. `tsv` corresponds to
        taxonomic classification format.
      pattern: "blast|xml|txt|daa|sam|tsv|paf"
+  - blast_columns:
+      type: string
+      description: |
+        Optional space separated list of DIAMOND tabular BLAST output keywords
+        used for in conjunction with the 'txt' out_ext option (--outfmt 6). See
+        DIAMOND documnetation for more information.

 output:
-  - txt:
+  - blast:
      type: file
      description: File containing blastp hits
-      pattern: "*.{blastp.txt}"
+      pattern: "*.{blast}"
+  - xml:
+      type: file
+      description: File containing blastp hits
+      pattern: "*.{xml}"
+  - txt:
+      type: file
+      description: File containing hits in tabular BLAST format.
+      pattern: "*.{txt}"
+  - daa:
+      type: file
+      description: File containing hits DAA format
+      pattern: "*.{daa}"
+  - sam:
+      type: file
+      description: File containing aligned reads in SAM format
+      pattern: "*.{sam}"
+  - tsv:
+      type: file
+      description: Tab separated file containing taxonomic classification of hits
+      pattern: "*.{tsv}"
+  - paf:
+      type: file
+      description: File containing aligned reads in pairwise mapping format format
+      pattern: "*.{paf}"
  - versions:
      type: file
      description: File containing software versions
--- a/modules/diamond/blastx/main.nf
+++ b/modules/diamond/blastx/main.nf
@ -10,11 +10,18 @@ process DIAMOND_BLASTX {
    input:
    tuple val(meta), path(fasta)
    path db
-    val outext
+    val out_ext
+    val blast_columns

    output:
-    tuple val(meta), path('*.{blast,xml,txt,daa,sam,tsv,paf}'), emit: output
-    path "versions.yml"           , emit: versions
+    tuple val(meta), path('*.blast'), optional: true, emit: blast
+    tuple val(meta), path('*.xml')  , optional: true, emit: xml
+    tuple val(meta), path('*.txt')  , optional: true, emit: txt
+    tuple val(meta), path('*.daa')  , optional: true, emit: daa
+    tuple val(meta), path('*.sam')  , optional: true, emit: sam
+    tuple val(meta), path('*.tsv')  , optional: true, emit: tsv
+    tuple val(meta), path('*.paf')  , optional: true, emit: paf
+    path "versions.yml"                               , emit: versions

    when:
    task.ext.when == null || task.ext.when
@ -22,7 +29,8 @@ process DIAMOND_BLASTX {
    script:
    def args = task.ext.args ?: ''
    def prefix = task.ext.prefix ?: "${meta.id}"
-    switch ( outext ) {
+    def columns = blast_columns ? "${blast_columns}" : ''
+    switch ( out_ext ) {
        case "blast": outfmt = 0; break
        case "xml": outfmt = 5; break
        case "txt": outfmt = 6; break
@ -30,6 +38,11 @@ process DIAMOND_BLASTX {
        case "sam": outfmt = 101; break
        case "tsv": outfmt = 102; break
        case "paf": outfmt = 103; break
+        default:
+            outfmt = '6';
+            out_ext = 'txt';
+            log.warn("Unknown output file format provided (${out_ext}): selecting DIAMOND default of tabular BLAST output (txt)");
+            break
    }
    """
    DB=`find -L ./ -name "*.dmnd" | sed 's/.dmnd//'`
@ -39,9 +52,9 @@ process DIAMOND_BLASTX {
        --threads $task.cpus \\
        --db \$DB \\
        --query $fasta \\
-        --outfmt ${outfmt} \\
+        --outfmt ${outfmt} ${columns} \\
        $args \\
-        --out ${prefix}.${outext}
+        --out ${prefix}.${out_ext}

    cat <<-END_VERSIONS > versions.yml
    "${task.process}":
--- a/modules/diamond/blastx/meta.yml
+++ b/modules/diamond/blastx/meta.yml
@ -28,7 +28,7 @@ input:
      type: directory
      description: Directory containing the nucelotide blast database
      pattern: "*"
-  - outext:
+  - out_ext:
      type: string
      description: |
        Specify the type of output file to be generated. `blast` corresponds to
@ -38,10 +38,34 @@ input:
      pattern: "blast|xml|txt|daa|sam|tsv|paf"

 output:
+  - blast:
+      type: file
+      description: File containing blastp hits
+      pattern: "*.{blast}"
+  - xml:
+      type: file
+      description: File containing blastp hits
+      pattern: "*.{xml}"
  - txt:
      type: file
-      description: File containing blastx hits
-      pattern: "*.{blastx.txt}"
+      description: File containing hits in tabular BLAST format.
+      pattern: "*.{txt}"
+  - daa:
+      type: file
+      description: File containing hits DAA format
+      pattern: "*.{daa}"
+  - sam:
+      type: file
+      description: File containing aligned reads in SAM format
+      pattern: "*.{sam}"
+  - tsv:
+      type: file
+      description: Tab separated file containing taxonomic classification of hits
+      pattern: "*.{tsv}"
+  - paf:
+      type: file
+      description: File containing aligned reads in pairwise mapping format format
+      pattern: "*.{paf}"
  - versions:
      type: file
      description: File containing software versions
--- a/tests/modules/diamond/blastp/main.nf
+++ b/tests/modules/diamond/blastp/main.nf
@ -9,18 +9,20 @@ workflow test_diamond_blastp {

    db = [ file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ]
    fasta = [ file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ]
-    outext = 'txt'
+    out_ext = 'txt'
+    blast_columns = 'qseqid qlen'

    DIAMOND_MAKEDB ( db )
-    DIAMOND_BLASTP ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db, outext )
+    DIAMOND_BLASTP ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db, out_ext, blast_columns )
 }

 workflow test_diamond_blastp_daa {

    db = [ file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ]
    fasta = [ file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ]
-    outext = 'daa'
+    out_ext = 'daa'
+    blast_columns = []

    DIAMOND_MAKEDB ( db )
-    DIAMOND_BLASTP ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db, outext )
+    DIAMOND_BLASTP ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db, out_ext, blast_columns )
 }
--- a/tests/modules/diamond/blastp/test.yml
+++ b/tests/modules/diamond/blastp/test.yml
@ -5,7 +5,6 @@
    - diamond
  files:
    - path: output/diamond/test.diamond_blastp.txt
-      md5sum: 2515cf88590afa32356497e79a51fce9
    - path: output/diamond/versions.yml

 - name: diamond blastp test_diamond_blastp_daa
@ -15,5 +14,4 @@
    - diamond
  files:
    - path: output/diamond/test.diamond_blastp.daa
-      md5sum: 0b539c68a5b66dd6e20ad5d218f4f4c6
    - path: output/diamond/versions.yml
--- a/tests/modules/diamond/blastx/main.nf
+++ b/tests/modules/diamond/blastx/main.nf
@ -9,18 +9,20 @@ workflow test_diamond_blastx {

    db = [ file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ]
    fasta = [ file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true) ]
-    outext = 'txt'
+    out_ext = 'tfdfdt'  // Nonsense file extension to check default case.
+    blast_columns = 'qseqid qlen'

    DIAMOND_MAKEDB ( db )
-    DIAMOND_BLASTX ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db, outext )
+    DIAMOND_BLASTX ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db, out_ext, blast_columns )
 }

 workflow test_diamond_blastx_daa {

    db = [ file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ]
    fasta = [ file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true) ]
-    outext = 'daa'
+    out_ext = 'daa'
+    blast_columns = []

    DIAMOND_MAKEDB ( db )
-    DIAMOND_BLASTX ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db, outext )
+    DIAMOND_BLASTX ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db, out_ext, blast_columns )
 }
--- a/tests/modules/diamond/blastx/test.yml
+++ b/tests/modules/diamond/blastx/test.yml
@ -5,7 +5,6 @@
    - diamond/blastx
  files:
    - path: output/diamond/test.diamond_blastx.txt
-      md5sum: eb2aebfa1cb42fcb2121c65528663307
    - path: output/diamond/versions.yml

 - name: diamond blastx test_diamond_blastx_daa