Merge pull request #1587 from jfy133/diamond-update

Update DIAMOND/blast* to support all output formats
2024-12-31 03:32:10 -05:00 · 2022-04-29 15:08:18 +02:00 · 2022-04-29 15:08:18 +02:00 · fa8edd917f
commit fa8edd917f
parent 0ec3dea37f 7de207f93f
8 changed files with 98 additions and 15 deletions
--- a/modules/diamond/blastp/main.nf
+++ b/modules/diamond/blastp/main.nf
@ -11,10 +11,11 @@ process DIAMOND_BLASTP {

    input:
    tuple val(meta), path(fasta)
-    path  db
+    path db
+    val outext

    output:
-    tuple val(meta), path('*.txt'), emit: txt
+    tuple val(meta), path('*.{blast,xml,txt,daa,sam,tsv,paf}'), emit: output
    path "versions.yml"           , emit: versions

    when:
@ -23,6 +24,15 @@ process DIAMOND_BLASTP {
    script:
    def args = task.ext.args ?: ''
    def prefix = task.ext.prefix ?: "${meta.id}"
+    switch ( outext ) {
+        case "blast": outfmt = 0; break
+        case "xml": outfmt = 5; break
+        case "txt": outfmt = 6; break
+        case "daa": outfmt = 100; break
+        case "sam": outfmt = 101; break
+        case "tsv": outfmt = 102; break
+        case "paf": outfmt = 103; break
+    }
    """
    DB=`find -L ./ -name "*.dmnd" | sed 's/.dmnd//'`

@ -31,8 +41,9 @@ process DIAMOND_BLASTP {
        --threads $task.cpus \\
        --db \$DB \\
        --query $fasta \\
+        --outfmt ${outfmt} \\
        $args \\
-        --out ${prefix}.txt
+        --out ${prefix}.${outext}

    cat <<-END_VERSIONS > versions.yml
    "${task.process}":
--- a/modules/diamond/blastp/meta.yml
+++ b/modules/diamond/blastp/meta.yml
@ -28,6 +28,14 @@ input:
      type: directory
      description: Directory containing the protein blast database
      pattern: "*"
+  - outext:
+      type: string
+      description: |
+        Specify the type of output file to be generated. `blast` corresponds to
+        BLAST pairwise format. `xml` corresponds to BLAST xml format.
+        `txt` corresponds to to BLAST tabular format. `tsv` corresponds to
+        taxonomic classification format.
+      pattern: "blast|xml|txt|daa|sam|tsv|paf"

 output:
  - txt:
@ -41,3 +49,4 @@ output:

 authors:
  - "@spficklin"
+  - "@jfy133"
--- a/modules/diamond/blastx/main.nf
+++ b/modules/diamond/blastx/main.nf
@ -11,10 +11,11 @@ process DIAMOND_BLASTX {

    input:
    tuple val(meta), path(fasta)
-    path  db
+    path db
+    val outext

    output:
-    tuple val(meta), path('*.txt'), emit: txt
+    tuple val(meta), path('*.{blast,xml,txt,daa,sam,tsv,paf}'), emit: output
    path "versions.yml"           , emit: versions

    when:
@ -23,6 +24,15 @@ process DIAMOND_BLASTX {
    script:
    def args = task.ext.args ?: ''
    def prefix = task.ext.prefix ?: "${meta.id}"
+    switch ( outext ) {
+        case "blast": outfmt = 0; break
+        case "xml": outfmt = 5; break
+        case "txt": outfmt = 6; break
+        case "daa": outfmt = 100; break
+        case "sam": outfmt = 101; break
+        case "tsv": outfmt = 102; break
+        case "paf": outfmt = 103; break
+    }
    """
    DB=`find -L ./ -name "*.dmnd" | sed 's/.dmnd//'`

@ -31,8 +41,9 @@ process DIAMOND_BLASTX {
        --threads $task.cpus \\
        --db \$DB \\
        --query $fasta \\
+        --outfmt ${outfmt} \\
        $args \\
-        --out ${prefix}.txt
+        --out ${prefix}.${outext}

    cat <<-END_VERSIONS > versions.yml
    "${task.process}":
--- a/modules/diamond/blastx/meta.yml
+++ b/modules/diamond/blastx/meta.yml
@ -28,6 +28,14 @@ input:
      type: directory
      description: Directory containing the nucelotide blast database
      pattern: "*"
+  - outext:
+      type: string
+      description: |
+        Specify the type of output file to be generated. `blast` corresponds to
+        BLAST pairwise format. `xml` corresponds to BLAST xml format.
+        `txt` corresponds to to BLAST tabular format. `tsv` corresponds to
+        taxonomic classification format.
+      pattern: "blast|xml|txt|daa|sam|tsv|paf"

 output:
  - txt:
@ -41,3 +49,4 @@ output:

 authors:
  - "@spficklin"
+  - "@jfy133"
--- a/tests/modules/diamond/blastp/main.nf
+++ b/tests/modules/diamond/blastp/main.nf
@ -9,7 +9,18 @@ workflow test_diamond_blastp {

    db = [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ]
    fasta = [ file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true) ]
+    outext = 'txt'

    DIAMOND_MAKEDB ( db )
-    DIAMOND_BLASTP ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db )
+    DIAMOND_BLASTP ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db, outext )
+}
+
+workflow test_diamond_blastp_daa {
+
+    db = [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ]
+    fasta = [ file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true) ]
+    outext = 'daa'
+
+    DIAMOND_MAKEDB ( db )
+    DIAMOND_BLASTP ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db, outext )
 }
--- a/tests/modules/diamond/blastp/test.yml
+++ b/tests/modules/diamond/blastp/test.yml
@ -1,8 +1,19 @@
- name: diamond blastp
-  command: nextflow run ./tests/modules/diamond/blastp -entry test_diamond_blastp -c ./tests/config/nextflow.config -c ./tests/modules/diamond/blastp/nextflow.config
+- name: diamond blastp test_diamond_blastp
+  command: nextflow run tests/modules/diamond/blastp -entry test_diamond_blastp -c tests/config/nextflow.config
  tags:
    - diamond
    - diamond/blastp
  files:
-    - path: ./output/diamond/test.diamond_blastp.txt
+    - path: output/diamond/test.diamond_blastp.txt
      md5sum: 3ca7f6290c1d8741c573370e6f8b4db0
+    - path: output/diamond/versions.yml
+
+- name: diamond blastp test_diamond_blastp_daa
+  command: nextflow run tests/modules/diamond/blastp -entry test_diamond_blastp_daa -c tests/config/nextflow.config
+  tags:
+    - diamond
+    - diamond/blastp
+  files:
+    - path: output/diamond/test.diamond_blastp.daa
+      md5sum: d4a79ad1fcb2ec69460e5a09a9468db7
+    - path: output/diamond/versions.yml
--- a/tests/modules/diamond/blastx/main.nf
+++ b/tests/modules/diamond/blastx/main.nf
@ -9,7 +9,18 @@ workflow test_diamond_blastx {

    db = [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ]
    fasta = [ file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true) ]
+    outext = 'txt'

    DIAMOND_MAKEDB ( db )
-    DIAMOND_BLASTX ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db )
+    DIAMOND_BLASTX ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db, outext )
+}
+
+workflow test_diamond_blastx_daa {
+
+    db = [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ]
+    fasta = [ file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true) ]
+    outext = 'daa'
+
+    DIAMOND_MAKEDB ( db )
+    DIAMOND_BLASTX ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db, outext )
 }
--- a/tests/modules/diamond/blastx/test.yml
+++ b/tests/modules/diamond/blastx/test.yml
@ -1,8 +1,18 @@
- name: diamond blastx
-  command: nextflow run ./tests/modules/diamond/blastx -entry test_diamond_blastx -c ./tests/config/nextflow.config -c ./tests/modules/diamond/blastx/nextflow.config
+- name: diamond blastx test_diamond_blastx
+  command: nextflow run tests/modules/diamond/blastx -entry test_diamond_blastx -c tests/config/nextflow.config
  tags:
    - diamond
    - diamond/blastx
  files:
-    - path: ./output/diamond/test.diamond_blastx.txt
-      md5sum: d41d8cd98f00b204e9800998ecf8427e
+    - path: output/diamond/test.diamond_blastx.txt
+    - path: output/diamond/versions.yml
+
+- name: diamond blastx test_diamond_blastx_daa
+  command: nextflow run tests/modules/diamond/blastx -entry test_diamond_blastx_daa -c tests/config/nextflow.config
+  tags:
+    - diamond
+    - diamond/blastx
+  files:
+    - path: output/diamond/test.diamond_blastx.daa
+      md5sum: 2a0ce0f7e01dcead828b87d5cbaccf7a
+    - path: output/diamond/versions.yml