New module: Vsearch sintax (#2094)

* Add vsearch sintax process

* Add tests for vsearch sintax

* Add vsearch sintax process

* Add tests for vsearch sintax

* Update format with prettier

* Remove trailing whitespace

Co-authored-by: nvnieuwk <101190534+nvnieuwk@users.noreply.github.com>
This commit is contained in:
Jeanette Tångrot 2022-09-26 15:45:01 +02:00 committed by GitHub
parent 29568f72ca
commit 32ad1746d1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 107 additions and 0 deletions

View file

@ -0,0 +1,38 @@
process VSEARCH_SINTAX {
tag "${meta.id}"
label 'process_low'
conda (params.enable_conda ? "bioconda::vsearch=2.21.1" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/vsearch:2.21.1--h95f258a_0':
'quay.io/biocontainers/vsearch:2.21.1--h95f258a_0' }"
input:
tuple val(meta), path(queryfasta)
path db
output:
tuple val(meta), path('*.tsv') , optional: true, emit: tsv
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
vsearch \\
--sintax $queryfasta \\
--db $db \\
--threads $task.cpus \\
$args \\
--tabbedout ${prefix}.tsv
cat <<-END_VERSIONS > versions.yml
"${task.process}":
vsearch: \$(vsearch --version 2>&1 | head -n 1 | sed 's/vsearch //g' | sed 's/,.*//g' | sed 's/^v//' | sed 's/_.*//')
END_VERSIONS
"""
}

View file

@ -0,0 +1,39 @@
name: "vsearch_sintax"
description: Taxonomic classification using the sintax algorithm.
keywords:
- vsearch
- sintax
tools:
- "vsearch":
description: "VSEARCH is a versatile open-source tool for microbiome analysis, including chimera detection, clustering, dereplication and rereplication, extraction, FASTA/FASTQ/SFF file processing, masking, orienting, pair-wise alignment, restriction site cutting, searching, shuffling, sorting, subsampling, and taxonomic classification of amplicon sequences for metagenomics, genomics, and population genetics. (USEARCH alternative)"
homepage: "https://github.com/torognes/vsearch"
documentation: "None"
tool_dev_url: "https://github.com/torognes/vsearch"
doi: "doi: 10.7717/peerj.2584"
licence: "['GPL v3-or-later OR BSD-2-clause']"
input:
- meta:
type: map
description: Groovy Map containing query file information e.g. [ id:'test' ]
- queryfasta:
type: file
description: Query sequences in FASTA or FASTQ format
pattern: "*.{fasta,fa,fna,faa,fastq,fq}"
- db:
type: file
description: Reference database file in FASTA or UDB format
pattern: "*"
output:
- tsv:
type: file
description: Results written to tab-delimited file
pattern: "*.{tsv}"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@jtangrot"

View file

@ -2452,6 +2452,10 @@ vcftools:
- modules/vcftools/**
- tests/modules/vcftools/**
vsearch/sintax:
- modules/vsearch/sintax/**
- tests/modules/vsearch/sintax/**
vsearch/usearchglobal:
- modules/vsearch/usearchglobal/**
- tests/modules/vsearch/usearchglobal/**

View file

@ -0,0 +1,13 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { VSEARCH_SINTAX } from '../../../../modules/vsearch/sintax/main.nf'
workflow test_vsearch_sintax {
query = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
db = file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true)
VSEARCH_SINTAX ( [[id:'test'], query], db )
}

View file

@ -0,0 +1,5 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}

View file

@ -0,0 +1,8 @@
- name: vsearch sintax test_vsearch_sintax
command: nextflow run ./tests/modules/vsearch/sintax -entry test_vsearch_sintax -c ./tests/config/nextflow.config -c ./tests/modules/vsearch/sintax/nextflow.config
tags:
- vsearch
- vsearch/sintax
files:
- path: output/vsearch/test.tsv
md5sum: 93bc75fb89343e23fbae971df157b14a