diff --git a/modules/vsearch/usearchglobal/main.nf b/modules/vsearch/usearchglobal/main.nf new file mode 100644 index 00000000..65ee62cc --- /dev/null +++ b/modules/vsearch/usearchglobal/main.nf @@ -0,0 +1,51 @@ +// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters. +// All other parameters MUST be provided using the "task.ext" directive, see here: +// https://www.nextflow.io/docs/latest/process.html#ext +// where "task.ext" is a string. +// Any parameters that need to be evaluated in the context of a particular sample +// e.g. single-end/paired-end data MUST also be defined and evaluated appropriately. +// TODO nf-core: Software that can be piped together SHOULD be added to separate module files +// unless there is a run-time, storage advantage in implementing in this way +// e.g. it's ok to have a single module for bwa to output BAM instead of SAM: +// bwa mem | samtools view -B -T ref.fasta + +process VSEARCH_USEARCHGLOBAL { + tag '$queryfasta' + label 'process_low' + + conda (params.enable_conda ? "bioconda::vsearch=2.21.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/vsearch:2.21.1--hf1761c0_1': + 'quay.io/biocontainers/vsearch:2.21.1--h95f258a_0' }" + + input: + path queryfasta + path db + val outprefix + + // TODO nf-core: Where applicable please provide/convert compressed files as input/output + // e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc. + + output: + path ("*.tsv") , emit: tsv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + vsearch \\ + --usearch_global $queryfasta \\ + --db $db \\ + --threads $task.cpus \\ + $args \\ + --blast6out ${outprefix}.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + vsearch: \$(vsearch --version 2>&1 | head -n 1 | sed 's/vsearch //g' | sed 's/,.*//g' | sed 's/^v//' | sed 's/_.*//') + END_VERSIONS + """ +} diff --git a/modules/vsearch/usearchglobal/meta.yml b/modules/vsearch/usearchglobal/meta.yml new file mode 100644 index 00000000..8cb97446 --- /dev/null +++ b/modules/vsearch/usearchglobal/meta.yml @@ -0,0 +1,38 @@ +name: "vsearch_usearchglobal" +description: Compare target sequences to fasta-formatted query sequences using global pairwise alignment. +keywords: + - vsearch + - usearch + - alignment + - fasta +tools: + - "vsearch": + description: "VSEARCH is a versatile open-source tool for microbiome analysis, including chimera detection, clustering, dereplication and rereplication, extraction, FASTA/FASTQ/SFF file processing, masking, orienting, pair-wise alignment, restriction site cutting, searching, shuffling, sorting, subsampling, and taxonomic classification of amplicon sequences for metagenomics, genomics, and population genetics. (USEARCH alternative)" + homepage: "https://github.com/torognes/vsearch" + documentation: "None" + tool_dev_url: "https://github.com/torognes/vsearch" + doi: "doi: 10.7717/peerj.2584" + licence: "['GPL v3-or-later OR BSD-2-clause']" + +input: + - queryfasta: + type: file + description: Query sequences in FASTA format + pattern: "*.{fasta,fa,fna,faa}" + - db: + type: file + description: Reference database file. It may be in FASTA or UDB format. + pattern: "*" + +output: + - blast6out: + type: file + description: Tab delimited results in blast-like format + pattern: "*.{tsv}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@jtangrot" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 529f3e02..3ff58b5c 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -2052,6 +2052,10 @@ vcftools: - modules/vcftools/** - tests/modules/vcftools/** +vsearch/usearchglobal: + - modules/vsearch/usearchglobal/** + - tests/modules/vsearch/usearchglobal/** + yara/index: - modules/yara/index/** - tests/modules/yara/index/** diff --git a/tests/modules/vsearch/usearchglobal/main.nf b/tests/modules/vsearch/usearchglobal/main.nf new file mode 100644 index 00000000..02becd21 --- /dev/null +++ b/tests/modules/vsearch/usearchglobal/main.nf @@ -0,0 +1,12 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { VSEARCH_USEARCHGLOBAL } from '../../../../modules/vsearch/usearchglobal/main.nf' + +workflow test_vsearch_usearchglobal { + + query = file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true) + db = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + VSEARCH_USEARCHGLOBAL ( query, db, "blast6out_results" ) +} diff --git a/tests/modules/vsearch/usearchglobal/nextflow.config b/tests/modules/vsearch/usearchglobal/nextflow.config new file mode 100644 index 00000000..aab29979 --- /dev/null +++ b/tests/modules/vsearch/usearchglobal/nextflow.config @@ -0,0 +1,5 @@ +process { + ext.args = '--id 0.985' + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/vsearch/usearchglobal/test.yml b/tests/modules/vsearch/usearchglobal/test.yml new file mode 100644 index 00000000..b013519d --- /dev/null +++ b/tests/modules/vsearch/usearchglobal/test.yml @@ -0,0 +1,7 @@ +- name: vsearch usearchglobal test_vsearch_usearchglobal + command: nextflow run ./tests/modules/vsearch/usearchglobal -entry test_vsearch_usearchglobal -c ./tests/config/nextflow.config -c ./tests/modules/vsearch/usearchglobal/nextflow.config + tags: + - vsearch/usearchglobal + files: + - path: output/vsearch/blast6out_results.tsv + md5sum: 09733131643f1d951321a6e17a35eb8c diff --git a/tests/modules/vsearch/usearchglobal/test.yml.ori b/tests/modules/vsearch/usearchglobal/test.yml.ori new file mode 100644 index 00000000..df644165 --- /dev/null +++ b/tests/modules/vsearch/usearchglobal/test.yml.ori @@ -0,0 +1,14 @@ +## TODO nf-core: Please run the following command to build this file: +# nf-core modules create-test-yml vsearch/usearchglobal +- name: "vsearch usearchglobal" + command: nextflow run ./tests/modules/vsearch/usearchglobal -entry test_vsearch_usearchglobal -c ./tests/config/nextflow.config -c ./tests/modules/vsearch/usearchglobal/nextflow.config + tags: + - "vsearch" + # + - "vsearch/usearchglobal" + # + files: + - path: "output/vsearch/blast6out_results.tsv" + md5sum: e667c7caad0bc4b7ac383fd023c654fc + - path: output/vsearch/versions.yml + md5sum: a01fe51bc4c6a3a6226fbf77b2c7cf3b