new (fixed) module: entrezdirect/esearch (#1927)

* added module files

* latest version

* latest version

* latest version

* latest version

* updated test, failing

* added comment on why to use tail -n+3

* same as latest test

* moved args to the end of script

* updated test

* double quotes for esearch input

* removed tail -n+3, stderr is redirected by nextflow

* changed single to double quotes

* Update modules/entrezdirect/esearch/main.nf

Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>

* Update modules/entrezdirect/esearch/meta.yml

Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>

* Update meta.yml

removed default from database parameter

Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>
This commit is contained in:
Alexander Ramos Díaz 2022-07-26 09:07:38 -06:00 committed by GitHub
parent ad426cb18c
commit 4ed5dc4593
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 195 additions and 0 deletions

View file

@ -0,0 +1,35 @@
process ENTREZDIRECT_ESEARCH {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "bioconda::entrez-direct=16.2" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/entrez-direct:16.2--he881be0_1':
'quay.io/biocontainers/entrez-direct:16.2--he881be0_1' }"
input:
tuple val(meta), val(term)
val database
output:
tuple val(meta), path("*.xml") , emit: xml
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def prefix = task.ext.prefix ?: "${meta.id}"
def args = task.ext.args ?: ''
"""
esearch \\
-db $database \\
-query $term \\
$args > ${prefix}.xml
cat <<-END_VERSIONS > versions.yml
"${task.process}":
esearch: \$(esearch -version)
END_VERSIONS
"""
}

View file

@ -0,0 +1,55 @@
name: "entrezdirect_esearch"
description: Searches a term in a public NCBI database
keywords:
- public datasets
- entrez
- search
- ncbi
- database
tools:
- entrezdirect:
description: |
Entrez Direct (EDirect) is a method for accessing the NCBI's set of
interconnected databases (publication, sequence, structure, gene,
variation, expression, etc.) from a UNIX terminal window. Functions
take search terms from command line arguments. Individual operations
are combined to build multi-step queries. Record retrieval and
formatting normally complete the process.
homepage: https://www.ncbi.nlm.nih.gov/books/NBK179288/
documentation: https://www.ncbi.nlm.nih.gov/books/NBK25501/
tool_dev_url: https://www.ncbi.nlm.nih.gov/books/NBK25498/
doi: 10.1016/S0076-6879(96)66012-1
licence: ["PUBLIC DOMAIN"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test' ]
- database:
type: value
description: Value must be a valid Entrez database name.
- term:
type: value
description: |
Entrez text query. All special characters must be URL encoded.
Spaces may be replaced by '+' signs.
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test' ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- result_xml:
type: file
description: Query result text in XML format
pattern: "*.xml"
authors:
- "@alxndrdiaz"

View file

@ -691,6 +691,10 @@ ensemblvep:
- modules/ensemblvep/**
- tests/modules/ensemblvep/**
entrezdirect/esearch:
- modules/entrezdirect/esearch/**
- tests/modules/entrezdirect/esearch/**
entrezdirect/esummary:
- modules/entrezdirect/esummary/**
- tests/modules/entrezdirect/esummary/**

View file

@ -0,0 +1,46 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { ENTREZDIRECT_ESEARCH as ENTREZDIRECT_ESEARCHP } from "../../../../modules/entrezdirect/esearch/main.nf"
include { ENTREZDIRECT_ESEARCH } from "../../../../modules/entrezdirect/esearch/main.nf"
//
// Test with PubMed database, using date range and spell check,
// see nextflow.config file for optional definition(ext.args)
//
workflow test_entrezdirect_esearch_pubmed {
input = [
[ id:'test_pubmed' ], // meta map
term = "selective serotonin reuptake inhibitor"
]
database = "pubmed"
ENTREZDIRECT_ESEARCHP ( input, database )
}
//
// Test with Genome database and species; no date range, no spell check
//
workflow test_entrezdirect_esearch_genome {
input = [
[ id:'test_genome' ], // meta map,
term = "Danio+rerio"
]
database = "genome"
ENTREZDIRECT_ESEARCH ( input, database )
}
//
// Test with Assembly database and GenBank accession; no date range, no spell check
//
workflow test_entrezdirect_esearch_assembly {
input = [
[ id:'test_assembly' ], // meta map
term = "GCA_000001635.9"
]
database = "assembly"
ENTREZDIRECT_ESEARCH ( input, database )
}

View file

@ -0,0 +1,8 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(":")[-1].tokenize("_")[0].toLowerCase()}" }
withName: ENTREZDIRECT_ESEARCHP {
ext.args = '-sort pub+date -datetype pdat -mindate 2021/06/20 -maxdate 2022/06/20 -spell'
}
}

View file

@ -0,0 +1,47 @@
- name: entrezdirect esearch test_entrezdirect_esearch_pubmed
command: nextflow run ./tests/modules/entrezdirect/esearch -entry test_entrezdirect_esearch_pubmed -c ./tests/config/nextflow.config -c ./tests/modules/entrezdirect/esearch/nextflow.config
tags:
- entrezdirect/esearch
- entrezdirect
files:
- path: output/entrezdirect/test_pubmed.xml
contains:
- "<ENTREZ_DIRECT>"
- "<Db>pubmed</Db>"
- "<WebEnv>"
- "<QueryKey>"
- "<Step>"
- "<Count>"
- "</ENTREZ_DIRECT>"
- name: entrezdirect esearch test_entrezdirect_esearch_genome
command: nextflow run ./tests/modules/entrezdirect/esearch -entry test_entrezdirect_esearch_genome -c ./tests/config/nextflow.config -c ./tests/modules/entrezdirect/esearch/nextflow.config
tags:
- entrezdirect/esearch
- entrezdirect
files:
- path: output/entrezdirect/test_genome.xml
contains:
- "<ENTREZ_DIRECT>"
- "<Db>genome</Db>"
- "<WebEnv>"
- "<QueryKey>"
- "<Count>"
- "<Step>"
- "</ENTREZ_DIRECT>"
- name: entrezdirect esearch test_entrezdirect_esearch_assembly
command: nextflow run ./tests/modules/entrezdirect/esearch -entry test_entrezdirect_esearch_assembly -c ./tests/config/nextflow.config -c ./tests/modules/entrezdirect/esearch/nextflow.config
tags:
- entrezdirect/esearch
- entrezdirect
files:
- path: output/entrezdirect/test_assembly.xml
contains:
- "<ENTREZ_DIRECT>"
- "<Db>assembly</Db>"
- "<WebEnv>"
- "<QueryKey>"
- "<Count>"
- "<Step>"
- "</ENTREZ_DIRECT>"