feat: add hmmsearch module and tests (#1273)

* feat: add hmmsearch module and tests

* chore: set medium resource requirements

* tests: look for correct output

* fix: add when condition

* Apply suggestions to meta.yml

Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>

* refactor: create gzip compressed output

* docs: describe compressed in-/output

Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>
This commit is contained in:
Moritz E. Beber 2022-02-23 15:32:33 +01:00 committed by GitHub
parent f9d25aad7f
commit 938387d10d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 193 additions and 0 deletions

View file

@ -0,0 +1,51 @@
process HMMER_HMMSEARCH {
tag "$meta.id"
label 'process_medium'
conda (params.enable_conda ? "bioconda::hmmer=3.3.2" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/hmmer:3.3.2--h1b792b2_1' :
'quay.io/biocontainers/hmmer:3.3.2--h1b792b2_1' }"
input:
tuple val(meta), path(hmmfile), path(seqdb), val(write_align), val(write_target), val(write_domain)
output:
tuple val(meta), path('*.txt.gz') , emit: output
tuple val(meta), path('*.sto.gz') , emit: alignments , optional: true
tuple val(meta), path('*.tbl.gz') , emit: target_summary, optional: true
tuple val(meta), path('*.domtbl.gz'), emit: domain_summary, optional: true
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
output = "${prefix}.txt"
alignment = write_align ? "-A ${prefix}.sto" : ''
target_summary = write_target ? "--tblout ${prefix}.tbl" : ''
domain_summary = write_domain ? "--domtblout ${prefix}.domtbl" : ''
"""
hmmsearch \\
$args \\
--cpu $task.cpus \\
-o $output \\
$alignment \\
$target_summary \\
$domain_summary \\
$hmmfile \\
$seqdb
gzip --no-name *.txt \\
${write_align ? '*.sto' : ''} \\
${write_target ? '*.tbl' : ''} \\
${write_domain ? '*.domtbl' : ''}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
hmmer: \$(hmmsearch -h | grep -o '^# HMMER [0-9.]*' | sed 's/^# HMMER *//')
END_VERSIONS
"""
}

View file

@ -0,0 +1,69 @@
name: hmmer_hmmsearch
description: search profile(s) against a sequence database
keywords:
- hidden Markov model
- HMM
- hmmer
- hmmsearch
tools:
- hmmer:
description: Biosequence analysis using profile hidden Markov models
homepage: http://hmmer.org/
documentation: http://hmmer.org/documentation.html
tool_dev_url: https://github.com/EddyRivasLab/hmmer
doi: "10.1371/journal.pcbi.1002195"
licence: ['BSD']
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- hmmfile:
type: file
description: One or more HMM profiles created with hmmbuild
pattern: "*.{hmm,hmm.gz}"
- seqdb:
type: file
description: Database of sequences in FASTA format
pattern: "*.{fasta,fna,faa,fa,fasta.gz,fna.gz,faa.gz,fa.gz}"
- write_align:
type: val
description: Flag to write optional alignment output. Specify with 'true' to output
- write_target:
type: val
description: Flag to write optional per target summary . Specify with 'true' to output
- write_domain:
type: val
description: Flag to write optional per domain summary. Specify with 'true' to output
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- output:
type: file
description: Human readable output summarizing hmmsearch results
pattern: "*.{txt.gz}"
- alignments:
type: file
description: Optional multiple sequence alignment (MSA) in Stockholm format
pattern: "*.{sto.gz}"
- target_summary:
type: file
description: Optional tabular (space-delimited) summary of per-target output
pattern: "*.{tbl.gz}"
- domain_summary:
type: file
description: Optional tabular (space-delimited) summary of per-domain output
pattern: "*.{domtbl.gz}"
authors:
- "@Midnighter"

View file

@ -775,6 +775,10 @@ hmmer/hmmalign:
- modules/hmmer/hmmalign/**
- tests/modules/hmmer/hmmalign/**
hmmer/hmmsearch:
- modules/hmmer/hmmsearch/**
- tests/modules/hmmer/hmmsearch/**
homer/annotatepeaks:
- modules/homer/annotatepeaks/**
- tests/modules/homer/annotatepeaks/**

View file

@ -0,0 +1,33 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { HMMER_HMMSEARCH } from '../../../../modules/hmmer/hmmsearch/main.nf'
workflow test_hmmer_hmmsearch {
input = [
[ id:'test', single_end:false ], // meta map
file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/hmmer/bac.16S_rRNA.hmm.gz', checkIfExists: true),
file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/hmmer/e_coli_k12_16s.fna.gz', checkIfExists: true),
false,
false,
false
]
HMMER_HMMSEARCH ( input )
}
workflow test_hmmer_hmmsearch_optional {
input = [
[ id:'test', single_end:false ], // meta map
file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/hmmer/bac.16S_rRNA.hmm.gz', checkIfExists: true),
file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/hmmer/e_coli_k12_16s.fna.gz', checkIfExists: true),
true,
true,
true
]
HMMER_HMMSEARCH ( input )
}

View file

@ -0,0 +1,5 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}

View file

@ -0,0 +1,31 @@
- name: hmmer hmmsearch test_hmmer_hmmsearch
command: nextflow run tests/modules/hmmer/hmmsearch -entry test_hmmer_hmmsearch -c tests/config/nextflow.config
tags:
- hmmer/hmmsearch
- hmmer
files:
- path: output/hmmer/test.txt.gz
contains:
- '[ok]'
- path: output/hmmer/versions.yml
md5sum: ed0808c10abd205c6bd0fb01f45259bb
- name: hmmer hmmsearch test_hmmer_hmmsearch_optional
command: nextflow run tests/modules/hmmer/hmmsearch -entry test_hmmer_hmmsearch_optional -c tests/config/nextflow.config
tags:
- hmmer/hmmsearch
- hmmer
files:
- path: output/hmmer/test.sto.gz
md5sum: d3121aa33455074c566fb7f8fdcda7b0
- path: output/hmmer/test.domtbl.gz
contains:
- '# [ok]'
- path: output/hmmer/test.tbl.gz
contains:
- '# [ok]'
- path: output/hmmer/test.txt.gz
contains:
- '[ok]'
- path: output/hmmer/versions.yml
md5sum: ebdcb08ae540e840f7b5c4c75a3a2993