From 01d86439f5a25cd6b46006420ad7eb35049f4b27 Mon Sep 17 00:00:00 2001 From: jtangrot Date: Thu, 26 May 2022 20:16:45 +0200 Subject: [PATCH] Add idcutoff as input --- modules/vsearch/usearchglobal/main.nf | 53 ++++++++----------- modules/vsearch/usearchglobal/meta.yml | 51 ++++++++++++++++-- tests/modules/vsearch/usearchglobal/main.nf | 7 +-- .../vsearch/usearchglobal/nextflow.config | 1 - tests/modules/vsearch/usearchglobal/test.yml | 8 +-- .../vsearch/usearchglobal/test.yml.ori | 14 ----- 6 files changed, 79 insertions(+), 55 deletions(-) delete mode 100644 tests/modules/vsearch/usearchglobal/test.yml.ori diff --git a/modules/vsearch/usearchglobal/main.nf b/modules/vsearch/usearchglobal/main.nf index 0b31842f..ed95b7c3 100644 --- a/modules/vsearch/usearchglobal/main.nf +++ b/modules/vsearch/usearchglobal/main.nf @@ -1,39 +1,31 @@ -// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters. -// All other parameters MUST be provided using the "task.ext" directive, see here: -// https://www.nextflow.io/docs/latest/process.html#ext -// where "task.ext" is a string. -// Any parameters that need to be evaluated in the context of a particular sample -// e.g. single-end/paired-end data MUST also be defined and evaluated appropriately. -// TODO nf-core: Software that can be piped together SHOULD be added to separate module files -// unless there is a run-time, storage advantage in implementing in this way -// e.g. it's ok to have a single module for bwa to output BAM instead of SAM: -// bwa mem | samtools view -B -T ref.fasta - process VSEARCH_USEARCHGLOBAL { - tag "$meta.id" + tag "${meta.id}" label 'process_low' conda (params.enable_conda ? "bioconda::vsearch=2.21.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/vsearch:2.21.1--hf1761c0_1': + 'https://depot.galaxyproject.org/singularity/vsearch:2.21.1--h95f258a_0': 'quay.io/biocontainers/vsearch:2.21.1--h95f258a_0' }" input: - tuple val(meta), path(queryfasta) + val(meta) + path(queryfasta) path db + val idcutoff val outoption val user_columns - // TODO nf-core: Where applicable please provide/convert compressed files as input/output - // e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc. - output: - tuple val(meta), path('*.aln') , optional: true, emit: aln - tuple val(meta), path('*.biom') , optional: true, emit: biom - tuple val(meta), path('*.sam') , optional: true, emit: sam - tuple val(meta), path('*.tsv') , optional: true, emit: tsv - tuple val(meta), path('*.uc') , optional: true, emit: uc - path "versions.yml" , emit: versions + tuple val(meta), path('*.aln') , optional: true, emit: aln + tuple val(meta), path('*.biom') , optional: true, emit: biom + tuple val(meta), path('*.lca') , optional: true, emit: lca + tuple val(meta), path('*.mothur') , optional: true, emit: mothur + tuple val(meta), path('*.otu') , optional: true, emit: otu + tuple val(meta), path('*.sam') , optional: true, emit: sam + tuple val(meta), path('*.tsv') , optional: true, emit: tsv + tuple val(meta), path('*.txt') , optional: true, emit: txt + tuple val(meta), path('*.uc') , optional: true, emit: uc + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -45,13 +37,13 @@ process VSEARCH_USEARCHGLOBAL { switch ( outoption ) { case "alnout": outfmt = "--alnout"; out_ext = 'aln'; break case "biomout": outfmt = "--biomout"; out_ext = 'biom'; break - case "blast6out": outfmt = "--blast6out"; out_ext = 'blast6out.tsv'; break - case "mothur_shared_out": outfmt = "--mothur_shared_out"; out_ext = 'mothur.tsv'; break - case "otutabout": outfmt = "--otutabout"; out_ext = 'otu.tsv'; break - case "samout": outfmt = "--samout"; out_ext = 'sam'; break - case "uc": outfmt = "--uc"; out_ext = 'uc'; break - case "userout": outfmt = "--userout"; out_ext = 'user.tsv'; break - case "lcaout": outfmt = "--lcaout"; out_ext = 'lca.tsv'; break + case "blast6out": outfmt = "--blast6out"; out_ext = 'txt'; break + case "mothur_shared_out": outfmt = "--mothur_shared_out"; out_ext = 'mothur'; break + case "otutabout": outfmt = "--otutabout"; out_ext = 'otu'; break + case "samout": outfmt = "--samout"; out_ext = 'sam'; break + case "uc": outfmt = "--uc"; out_ext = 'uc'; break + case "userout": outfmt = "--userout"; out_ext = 'tsv'; break + case "lcaout": outfmt = "--lcaout"; out_ext = 'lca'; break default: outfmt = "--alnout"; out_ext = 'aln'; @@ -62,6 +54,7 @@ process VSEARCH_USEARCHGLOBAL { vsearch \\ --usearch_global $queryfasta \\ --db $db \\ + --id $idcutoff \\ --threads $task.cpus \\ $args \\ ${columns} \\ diff --git a/modules/vsearch/usearchglobal/meta.yml b/modules/vsearch/usearchglobal/meta.yml index 8cb97446..848fb65a 100644 --- a/modules/vsearch/usearchglobal/meta.yml +++ b/modules/vsearch/usearchglobal/meta.yml @@ -15,20 +15,65 @@ tools: licence: "['GPL v3-or-later OR BSD-2-clause']" input: + - meta: + type: map + description: Groovy Map containing sample information e.g. [ id:'test' ] - queryfasta: type: file description: Query sequences in FASTA format pattern: "*.{fasta,fa,fna,faa}" - db: type: file - description: Reference database file. It may be in FASTA or UDB format. + description: Reference database file in FASTA or UDB format pattern: "*" + - idcutoff: + type: real + description: Reject the sequence match if the pairwise identity is lower than the given id cutoff value (value ranging from 0.0 to 1.0 included) + - outoption: + type: string + description: Specify the type of output file to be generated by selecting one of the vsearch output file options + pattern: "alnout|biomout|blast6out|mothur_shared_out|otutabout|samout|uc|userout|lcaout" + - user_columns: + type: string + description: If using the `userout` option, specify which columns to include in output, with fields separated with `+` (e.g. query+target+id). See USEARCH manual for valid options. For other output options, use an empty string. output: - - blast6out: + - aln: type: file - description: Tab delimited results in blast-like format + description: Results in pairwise alignment format + pattern: "*.{aln}" + - biom: + type: file + description: Results in an OTU table in the biom version 1.0 file format + pattern: "*.{biom}" + - lca: + type: file + description: Last common ancestor (LCA) information about the hits of each query in tab-separated format + pattern: "*.{lca}" + - mothur: + type: file + description: Results in an OTU table in the mothur ’shared’ tab-separated plain text file format + pattern: "*.{mothur}" + - otu: + type: file + description: Results in an OTU table in the classic tab-separated plain text format + pattern: "*.{otu}" + - sam: + type: file + description: Results written in sam format + pattern: "*.{sam}" + - tsv: + type: file + description: Results in tab-separated output, columns defined by user pattern: "*.{tsv}" + - txt: + type: file + description: Tab delimited results in blast-like tabular format + pattern: "*.{txt}" + - uc: + type: file + description: Tab delimited results in a uclust-like format with 10 columns + pattern: "*.{uc}" - versions: type: file description: File containing software versions diff --git a/tests/modules/vsearch/usearchglobal/main.nf b/tests/modules/vsearch/usearchglobal/main.nf index cdf0fb2f..f4afec32 100644 --- a/tests/modules/vsearch/usearchglobal/main.nf +++ b/tests/modules/vsearch/usearchglobal/main.nf @@ -8,17 +8,18 @@ workflow test_vsearch_usearchglobal { query = file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true) db = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) - + idcutoff = 0.985 outoption = "xcfert" // Nonsense text to check default case. columns = "" - VSEARCH_USEARCHGLOBAL ( [ [id:'test'], query ], db, outoption, columns ) + VSEARCH_USEARCHGLOBAL ( [id:'test'], query, db, idcutoff, outoption, columns ) } workflow test_vsearch_usearchglobal_userout { query = file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true) db = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + idcutoff = 0.985 outoption = "userout" columns = "query+target+id" - VSEARCH_USEARCHGLOBAL ( [ [id:'test'], query ], db, outoption, columns ) + VSEARCH_USEARCHGLOBAL ( [id:'test'], query, db, idcutoff, outoption, columns ) } diff --git a/tests/modules/vsearch/usearchglobal/nextflow.config b/tests/modules/vsearch/usearchglobal/nextflow.config index 3148a59e..14f46dea 100644 --- a/tests/modules/vsearch/usearchglobal/nextflow.config +++ b/tests/modules/vsearch/usearchglobal/nextflow.config @@ -1,5 +1,4 @@ process { - ext.args = '--id 0.985' publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } } diff --git a/tests/modules/vsearch/usearchglobal/test.yml b/tests/modules/vsearch/usearchglobal/test.yml index b7ebec4a..79bcd50e 100644 --- a/tests/modules/vsearch/usearchglobal/test.yml +++ b/tests/modules/vsearch/usearchglobal/test.yml @@ -1,17 +1,17 @@ - name: vsearch usearchglobal test_vsearch_usearchglobal command: nextflow run ./tests/modules/vsearch/usearchglobal -entry test_vsearch_usearchglobal -c ./tests/config/nextflow.config -c ./tests/modules/vsearch/usearchglobal/nextflow.config tags: - - vsearch - vsearch/usearchglobal + - vsearch files: - path: output/vsearch/test.aln - md5sum: 7b7479c16e0ecb503913da8bde48d6c5 + md5sum: 2543c4147530dcb1ba4550d3fdb1502a - name: vsearch usearchglobal test_vsearch_usearchglobal_userout command: nextflow run ./tests/modules/vsearch/usearchglobal -entry test_vsearch_usearchglobal_userout -c ./tests/config/nextflow.config -c ./tests/modules/vsearch/usearchglobal/nextflow.config tags: - - vsearch - vsearch/usearchglobal + - vsearch files: - - path: output/vsearch/test.user.tsv + - path: output/vsearch/test.tsv md5sum: b6cc50f7c8d18cb82e74dab70ed4baab diff --git a/tests/modules/vsearch/usearchglobal/test.yml.ori b/tests/modules/vsearch/usearchglobal/test.yml.ori deleted file mode 100644 index df644165..00000000 --- a/tests/modules/vsearch/usearchglobal/test.yml.ori +++ /dev/null @@ -1,14 +0,0 @@ -## TODO nf-core: Please run the following command to build this file: -# nf-core modules create-test-yml vsearch/usearchglobal -- name: "vsearch usearchglobal" - command: nextflow run ./tests/modules/vsearch/usearchglobal -entry test_vsearch_usearchglobal -c ./tests/config/nextflow.config -c ./tests/modules/vsearch/usearchglobal/nextflow.config - tags: - - "vsearch" - # - - "vsearch/usearchglobal" - # - files: - - path: "output/vsearch/blast6out_results.tsv" - md5sum: e667c7caad0bc4b7ac383fd023c654fc - - path: output/vsearch/versions.yml - md5sum: a01fe51bc4c6a3a6226fbf77b2c7cf3b