From 01d86439f5a25cd6b46006420ad7eb35049f4b27 Mon Sep 17 00:00:00 2001
From: jtangrot <jeanette.tangrot@umu.se>
Date: Thu, 26 May 2022 20:16:45 +0200
Subject: [PATCH] Add idcutoff as input

---
 modules/vsearch/usearchglobal/main.nf         | 53 ++++++++-----------
 modules/vsearch/usearchglobal/meta.yml        | 51 ++++++++++++++++--
 tests/modules/vsearch/usearchglobal/main.nf   |  7 +--
 .../vsearch/usearchglobal/nextflow.config     |  1 -
 tests/modules/vsearch/usearchglobal/test.yml  |  8 +--
 .../vsearch/usearchglobal/test.yml.ori        | 14 -----
 6 files changed, 79 insertions(+), 55 deletions(-)
 delete mode 100644 tests/modules/vsearch/usearchglobal/test.yml.ori

diff --git a/modules/vsearch/usearchglobal/main.nf b/modules/vsearch/usearchglobal/main.nf
index 0b31842f..ed95b7c3 100644
--- a/modules/vsearch/usearchglobal/main.nf
+++ b/modules/vsearch/usearchglobal/main.nf
@@ -1,39 +1,31 @@
-// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters.
-//               All other parameters MUST be provided using the "task.ext" directive, see here:
-//               https://www.nextflow.io/docs/latest/process.html#ext
-//               where "task.ext" is a string.
-//               Any parameters that need to be evaluated in the context of a particular sample
-//               e.g. single-end/paired-end data MUST also be defined and evaluated appropriately.
-// TODO nf-core: Software that can be piped together SHOULD be added to separate module files
-//               unless there is a run-time, storage advantage in implementing in this way
-//               e.g. it's ok to have a single module for bwa to output BAM instead of SAM:
-//                 bwa mem | samtools view -B -T ref.fasta
-
 process VSEARCH_USEARCHGLOBAL {
-    tag "$meta.id"
+    tag "${meta.id}"
     label 'process_low'
 
     conda (params.enable_conda ? "bioconda::vsearch=2.21.1" : null)
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/vsearch:2.21.1--hf1761c0_1':
+        'https://depot.galaxyproject.org/singularity/vsearch:2.21.1--h95f258a_0':
         'quay.io/biocontainers/vsearch:2.21.1--h95f258a_0' }"
 
     input:
-    tuple val(meta), path(queryfasta)
+    val(meta)
+    path(queryfasta)
     path db
+    val idcutoff
     val outoption
     val user_columns
 
-    // TODO nf-core: Where applicable please provide/convert compressed files as input/output
-    //               e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc.
-
     output:
-    tuple val(meta), path('*.aln')  , optional: true, emit: aln
-    tuple val(meta), path('*.biom') , optional: true, emit: biom
-    tuple val(meta), path('*.sam')  , optional: true, emit: sam
-    tuple val(meta), path('*.tsv')  , optional: true, emit: tsv
-    tuple val(meta), path('*.uc')   , optional: true, emit: uc
-    path "versions.yml"                             , emit: versions
+    tuple val(meta), path('*.aln')    , optional: true, emit: aln
+    tuple val(meta), path('*.biom')   , optional: true, emit: biom
+    tuple val(meta), path('*.lca')    , optional: true, emit: lca
+    tuple val(meta), path('*.mothur') , optional: true, emit: mothur
+    tuple val(meta), path('*.otu')    , optional: true, emit: otu
+    tuple val(meta), path('*.sam')    , optional: true, emit: sam
+    tuple val(meta), path('*.tsv')    , optional: true, emit: tsv
+    tuple val(meta), path('*.txt')    , optional: true, emit: txt
+    tuple val(meta), path('*.uc')     , optional: true, emit: uc
+    path "versions.yml"                               , emit: versions
     
     when:
     task.ext.when == null || task.ext.when
@@ -45,13 +37,13 @@ process VSEARCH_USEARCHGLOBAL {
     switch ( outoption ) {
         case "alnout": outfmt = "--alnout"; out_ext = 'aln'; break
         case "biomout": outfmt = "--biomout"; out_ext = 'biom'; break
-	case "blast6out": outfmt = "--blast6out"; out_ext = 'blast6out.tsv'; break
-        case "mothur_shared_out": outfmt = "--mothur_shared_out"; out_ext = 'mothur.tsv'; break
-	case "otutabout": outfmt = "--otutabout"; out_ext = 'otu.tsv'; break
-	case "samout": outfmt = "--samout"; out_ext = 'sam'; break
-	case "uc": outfmt = "--uc"; out_ext = 'uc'; break
-	case "userout": outfmt = "--userout"; out_ext = 'user.tsv'; break
-	case "lcaout": outfmt = "--lcaout"; out_ext = 'lca.tsv'; break
+        case "blast6out": outfmt = "--blast6out"; out_ext = 'txt'; break
+        case "mothur_shared_out": outfmt = "--mothur_shared_out"; out_ext = 'mothur'; break
+        case "otutabout": outfmt = "--otutabout"; out_ext = 'otu'; break
+        case "samout": outfmt = "--samout"; out_ext = 'sam'; break
+        case "uc": outfmt = "--uc"; out_ext = 'uc'; break
+        case "userout": outfmt = "--userout"; out_ext = 'tsv'; break
+        case "lcaout": outfmt = "--lcaout"; out_ext = 'lca'; break
         default:
             outfmt = "--alnout";
             out_ext = 'aln';
@@ -62,6 +54,7 @@ process VSEARCH_USEARCHGLOBAL {
     vsearch \\
         --usearch_global $queryfasta \\
         --db $db \\
+        --id $idcutoff \\
         --threads $task.cpus \\
         $args \\
         ${columns} \\
diff --git a/modules/vsearch/usearchglobal/meta.yml b/modules/vsearch/usearchglobal/meta.yml
index 8cb97446..848fb65a 100644
--- a/modules/vsearch/usearchglobal/meta.yml
+++ b/modules/vsearch/usearchglobal/meta.yml
@@ -15,20 +15,65 @@ tools:
       licence: "['GPL v3-or-later OR BSD-2-clause']"
 
 input:
+  - meta:
+      type: map
+      description: Groovy Map containing sample information e.g. [ id:'test' ]
   - queryfasta:
       type: file
       description: Query sequences in FASTA format
       pattern: "*.{fasta,fa,fna,faa}"
   - db:
       type: file
-      description: Reference database file. It may be in FASTA or UDB format.
+      description: Reference database file in FASTA or UDB format
       pattern: "*"
+  - idcutoff:
+      type: real
+      description: Reject the sequence match if the pairwise identity is lower than the given id cutoff value (value ranging from 0.0 to 1.0 included)
+  - outoption:
+      type: string
+      description: Specify the type of output file to be generated by selecting one of the vsearch output file options
+      pattern: "alnout|biomout|blast6out|mothur_shared_out|otutabout|samout|uc|userout|lcaout"
+  - user_columns:
+      type: string
+      description: If using the `userout` option, specify which columns to include in output, with fields separated with `+` (e.g. query+target+id). See USEARCH manual for valid options. For other output options, use an empty string.
 
 output:
-  - blast6out:
+  - aln:
       type: file
-      description: Tab delimited results in blast-like format
+      description: Results in pairwise alignment format
+      pattern: "*.{aln}"
+  - biom:
+      type: file
+      description: Results in an OTU table in the biom version 1.0 file format
+      pattern: "*.{biom}"
+  - lca:
+      type: file
+      description: Last common ancestor (LCA) information about the hits of each query in tab-separated format
+      pattern: "*.{lca}"
+  - mothur:
+      type: file
+      description: Results in an OTU table in the mothur ’shared’ tab-separated plain text file format
+      pattern: "*.{mothur}"
+  - otu:
+      type: file
+      description: Results in an OTU table in the classic tab-separated plain text format
+      pattern: "*.{otu}"
+  - sam:
+      type: file
+      description: Results written in sam format
+      pattern: "*.{sam}"
+  - tsv:
+      type: file
+      description: Results in tab-separated output, columns defined by user
       pattern: "*.{tsv}" 
+  - txt:
+      type: file
+      description: Tab delimited results in blast-like tabular format
+      pattern: "*.{txt}"
+  - uc:
+      type: file
+      description: Tab delimited results in a uclust-like format with 10 columns
+      pattern: "*.{uc}"
   - versions:
       type: file
       description: File containing software versions
diff --git a/tests/modules/vsearch/usearchglobal/main.nf b/tests/modules/vsearch/usearchglobal/main.nf
index cdf0fb2f..f4afec32 100644
--- a/tests/modules/vsearch/usearchglobal/main.nf
+++ b/tests/modules/vsearch/usearchglobal/main.nf
@@ -8,17 +8,18 @@ workflow test_vsearch_usearchglobal {
     
     query = file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true)
     db = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
-    
+    idcutoff = 0.985
     outoption = "xcfert"  // Nonsense text to check default case.
     columns = "" 
-    VSEARCH_USEARCHGLOBAL ( [ [id:'test'], query ], db, outoption, columns )
+    VSEARCH_USEARCHGLOBAL ( [id:'test'], query, db, idcutoff, outoption, columns )
 }
 
 workflow test_vsearch_usearchglobal_userout {
     
     query = file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true)
     db = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
+    idcutoff = 0.985
     outoption = "userout"
     columns = "query+target+id" 
-    VSEARCH_USEARCHGLOBAL ( [ [id:'test'], query ], db, outoption, columns )
+    VSEARCH_USEARCHGLOBAL ( [id:'test'], query, db, idcutoff, outoption, columns )
 }
diff --git a/tests/modules/vsearch/usearchglobal/nextflow.config b/tests/modules/vsearch/usearchglobal/nextflow.config
index 3148a59e..14f46dea 100644
--- a/tests/modules/vsearch/usearchglobal/nextflow.config
+++ b/tests/modules/vsearch/usearchglobal/nextflow.config
@@ -1,5 +1,4 @@
 process {
-    ext.args = '--id 0.985'
     publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
 } 
 
diff --git a/tests/modules/vsearch/usearchglobal/test.yml b/tests/modules/vsearch/usearchglobal/test.yml
index b7ebec4a..79bcd50e 100644
--- a/tests/modules/vsearch/usearchglobal/test.yml
+++ b/tests/modules/vsearch/usearchglobal/test.yml
@@ -1,17 +1,17 @@
 - name: vsearch usearchglobal test_vsearch_usearchglobal
   command: nextflow run ./tests/modules/vsearch/usearchglobal -entry test_vsearch_usearchglobal -c ./tests/config/nextflow.config  -c ./tests/modules/vsearch/usearchglobal/nextflow.config
   tags:
-    - vsearch
     - vsearch/usearchglobal
+    - vsearch
   files:
     - path: output/vsearch/test.aln
-      md5sum: 7b7479c16e0ecb503913da8bde48d6c5
+      md5sum: 2543c4147530dcb1ba4550d3fdb1502a
 
 - name: vsearch usearchglobal test_vsearch_usearchglobal_userout
   command: nextflow run ./tests/modules/vsearch/usearchglobal -entry test_vsearch_usearchglobal_userout -c ./tests/config/nextflow.config  -c ./tests/modules/vsearch/usearchglobal/nextflow.config
   tags:
-    - vsearch
     - vsearch/usearchglobal
+    - vsearch
   files:
-    - path: output/vsearch/test.user.tsv
+    - path: output/vsearch/test.tsv
       md5sum: b6cc50f7c8d18cb82e74dab70ed4baab
diff --git a/tests/modules/vsearch/usearchglobal/test.yml.ori b/tests/modules/vsearch/usearchglobal/test.yml.ori
deleted file mode 100644
index df644165..00000000
--- a/tests/modules/vsearch/usearchglobal/test.yml.ori
+++ /dev/null
@@ -1,14 +0,0 @@
-## TODO nf-core: Please run the following command to build this file:
-#                nf-core modules create-test-yml vsearch/usearchglobal
-- name: "vsearch usearchglobal"
-  command: nextflow run ./tests/modules/vsearch/usearchglobal -entry test_vsearch_usearchglobal -c ./tests/config/nextflow.config -c ./tests/modules/vsearch/usearchglobal/nextflow.config
-  tags:
-    - "vsearch"
-    #
-    - "vsearch/usearchglobal"
-    #
-  files:
-    - path: "output/vsearch/blast6out_results.tsv"
-      md5sum: e667c7caad0bc4b7ac383fd023c654fc
-    - path: output/vsearch/versions.yml
-      md5sum: a01fe51bc4c6a3a6226fbf77b2c7cf3b