Add module: goat/taxonsearch (#1866)

* first commit

* single taxon input

* added .tsv output

* input: single taxon or file with taxon identifiers

* updated input and output

* removed wrong tool description

* added tests

* ext.args = '-l -b'

* fixed wrong input names

* updated test file

* Update modules/goat/taxonsearch/main.nf

simple version output

Co-authored-by: Matthieu Muffato <mm49@sanger.ac.uk>

* removed 'NO_FILE' from input definition

* added ! in if statement

* optional input: empty list

* successful updated test

* added test with file

* remove blank spaces in include {}

* added test with taxa file

Co-authored-by: Matthieu Muffato <mm49@sanger.ac.uk>
This commit is contained in:
Alexander Ramos Díaz 2022-08-18 08:34:58 -06:00 committed by GitHub
parent 454e0acc09
commit 37c6d4a1a1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 169 additions and 0 deletions

View file

@ -0,0 +1,36 @@
process GOAT_TAXONSEARCH {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "bioconda::goat=0.2.0" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/goat:0.2.0--h92d785c_0':
'quay.io/biocontainers/goat:0.2.0--h92d785c_0' }"
input:
tuple val(meta), val(taxon), path(taxa_file)
output:
tuple val(meta), path("*.tsv"), emit: taxonsearch
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
input = taxa_file ? "-f ${taxa_file}" : "-t ${taxon}"
if (!taxon && !taxa_file) error "No input. Valid input: single taxon identifier or a .txt file with identifiers"
if (taxon && taxa_file ) error "Only one input is required: a single taxon identifier or a .txt file with identifiers"
"""
goat-cli taxon search \\
$args \\
$input > ${prefix}.tsv
cat <<-END_VERSIONS > versions.yml
"${task.process}":
goat: \$(goat-cli --version | cut -d' ' -f2)
END_VERSIONS
"""
}

View file

@ -0,0 +1,51 @@
name: "goat_taxonsearch"
description: Query metadata for any taxon across the tree of life.
keywords:
- public datasets
- ncbi
- genomes on a tree
tools:
- goat:
description: |
goat-cli is a command line interface to query the
Genomes on a Tree Open API.
homepage: https://github.com/genomehubs/goat-cli
documentation: https://github.com/genomehubs/goat-cli/wiki
tool_dev_url: https://genomehubs.github.io/goat-cli/goat_cli/
doi: "None"
licence: ["MIT"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test']
- taxon:
type: val
description: |
The taxon to search. An NCBI taxon ID, or the name of a taxon at any rank.
- taxa_file:
type: file
description: |
A file of NCBI taxonomy ID's (tips) and/or binomial names. Each line
should contain a single entry.File size is limited to 500 entries.
pattern: "*.txt"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- taxonsearch:
type: file
description: TSV file containing search results.
pattern: "*.tsv"
authors:
- "@alxndrdiaz"

View file

@ -1015,6 +1015,10 @@ glnexus:
- modules/glnexus/**
- tests/modules/glnexus/**
goat/taxonsearch:
- modules/goat/taxonsearch/**
- tests/modules/goat/taxonsearch/**
graphmap2/align:
- modules/graphmap2/align/**
- tests/modules/graphmap2/align/**

View file

@ -0,0 +1,44 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { GOAT_TAXONSEARCH } from '../../../../modules/goat/taxonsearch/main.nf'
//
// Test with genus name (Canis)
//
workflow test_goat_taxonsearch_genus_name {
input = [
[ id:'test_genus_name' ], // meta map
taxon = 'Canis',
[]
]
GOAT_TAXONSEARCH ( input )
}
//
// Test with genus (Drosophila, fruit flies) using NCBI taxonomy ID
//
workflow test_goat_taxonsearch_genus_id {
input = [
[ id:'test_genus_id' ], // meta map
taxon = '7215',
[]
]
GOAT_TAXONSEARCH ( input )
}
//
// Test with multiple species from a taxa file
//
workflow test_goat_taxonsearch_species {
input = [
[ id:'test_species' ], // meta map
taxon = '',
file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/goat_taxonsearch/taxonomy_ids.txt', checkIfExists: true)
]
GOAT_TAXONSEARCH ( input )
}

View file

@ -0,0 +1,8 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
withName: GOAT_TAXONSEARCH {
ext.args = '-l -b'
}
}

View file

@ -0,0 +1,26 @@
- name: goat taxonsearch test_goat_taxonsearch_genus_name
command: nextflow run ./tests/modules/goat/taxonsearch -entry test_goat_taxonsearch_genus_name -c ./tests/config/nextflow.config -c ./tests/modules/goat/taxonsearch/nextflow.config
tags:
- goat
- goat/taxonsearch
files:
- path: output/goat/test_genus_name.tsv
md5sum: e3de63a2bc2e16038aabeb325452bd16
- name: goat taxonsearch test_goat_taxonsearch_genus_id
command: nextflow run ./tests/modules/goat/taxonsearch -entry test_goat_taxonsearch_genus_id -c ./tests/config/nextflow.config -c ./tests/modules/goat/taxonsearch/nextflow.config
tags:
- goat
- goat/taxonsearch
files:
- path: output/goat/test_genus_id.tsv
md5sum: 2cc41abbbc21b4b66f4d5126cfbba2bc
- name: goat taxonsearch test_goat_taxonsearch_species
command: nextflow run ./tests/modules/goat/taxonsearch -entry test_goat_taxonsearch_species -c ./tests/config/nextflow.config -c ./tests/modules/goat/taxonsearch/nextflow.config
tags:
- goat
- goat/taxonsearch
files:
- path: output/goat/test_species.tsv
md5sum: 7f5b36414820bd3ce2bfda06d98e479c