From 938387d10d5736fb6e51f54cd2c86349cedc74e2 Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Wed, 23 Feb 2022 15:32:33 +0100 Subject: [PATCH] feat: add hmmsearch module and tests (#1273) * feat: add hmmsearch module and tests * chore: set medium resource requirements * tests: look for correct output * fix: add when condition * Apply suggestions to meta.yml Co-authored-by: James A. Fellows Yates * refactor: create gzip compressed output * docs: describe compressed in-/output Co-authored-by: James A. Fellows Yates --- modules/hmmer/hmmsearch/main.nf | 51 ++++++++++++++ modules/hmmer/hmmsearch/meta.yml | 69 +++++++++++++++++++ tests/config/pytest_modules.yml | 4 ++ tests/modules/hmmer/hmmsearch/main.nf | 33 +++++++++ tests/modules/hmmer/hmmsearch/nextflow.config | 5 ++ tests/modules/hmmer/hmmsearch/test.yml | 31 +++++++++ 6 files changed, 193 insertions(+) create mode 100644 modules/hmmer/hmmsearch/main.nf create mode 100644 modules/hmmer/hmmsearch/meta.yml create mode 100644 tests/modules/hmmer/hmmsearch/main.nf create mode 100644 tests/modules/hmmer/hmmsearch/nextflow.config create mode 100644 tests/modules/hmmer/hmmsearch/test.yml diff --git a/modules/hmmer/hmmsearch/main.nf b/modules/hmmer/hmmsearch/main.nf new file mode 100644 index 00000000..3a2d15b7 --- /dev/null +++ b/modules/hmmer/hmmsearch/main.nf @@ -0,0 +1,51 @@ +process HMMER_HMMSEARCH { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::hmmer=3.3.2" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/hmmer:3.3.2--h1b792b2_1' : + 'quay.io/biocontainers/hmmer:3.3.2--h1b792b2_1' }" + + input: + tuple val(meta), path(hmmfile), path(seqdb), val(write_align), val(write_target), val(write_domain) + + output: + tuple val(meta), path('*.txt.gz') , emit: output + tuple val(meta), path('*.sto.gz') , emit: alignments , optional: true + tuple val(meta), path('*.tbl.gz') , emit: target_summary, optional: true + tuple val(meta), path('*.domtbl.gz'), emit: domain_summary, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + output = "${prefix}.txt" + alignment = write_align ? "-A ${prefix}.sto" : '' + target_summary = write_target ? "--tblout ${prefix}.tbl" : '' + domain_summary = write_domain ? "--domtblout ${prefix}.domtbl" : '' + """ + hmmsearch \\ + $args \\ + --cpu $task.cpus \\ + -o $output \\ + $alignment \\ + $target_summary \\ + $domain_summary \\ + $hmmfile \\ + $seqdb + + gzip --no-name *.txt \\ + ${write_align ? '*.sto' : ''} \\ + ${write_target ? '*.tbl' : ''} \\ + ${write_domain ? '*.domtbl' : ''} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + hmmer: \$(hmmsearch -h | grep -o '^# HMMER [0-9.]*' | sed 's/^# HMMER *//') + END_VERSIONS + """ +} diff --git a/modules/hmmer/hmmsearch/meta.yml b/modules/hmmer/hmmsearch/meta.yml new file mode 100644 index 00000000..b315d668 --- /dev/null +++ b/modules/hmmer/hmmsearch/meta.yml @@ -0,0 +1,69 @@ +name: hmmer_hmmsearch +description: search profile(s) against a sequence database +keywords: + - hidden Markov model + - HMM + - hmmer + - hmmsearch +tools: + - hmmer: + description: Biosequence analysis using profile hidden Markov models + homepage: http://hmmer.org/ + documentation: http://hmmer.org/documentation.html + tool_dev_url: https://github.com/EddyRivasLab/hmmer + doi: "10.1371/journal.pcbi.1002195" + licence: ['BSD'] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - hmmfile: + type: file + description: One or more HMM profiles created with hmmbuild + pattern: "*.{hmm,hmm.gz}" + - seqdb: + type: file + description: Database of sequences in FASTA format + pattern: "*.{fasta,fna,faa,fa,fasta.gz,fna.gz,faa.gz,fa.gz}" + - write_align: + type: val + description: Flag to write optional alignment output. Specify with 'true' to output + - write_target: + type: val + description: Flag to write optional per target summary . Specify with 'true' to output + - write_domain: + type: val + description: Flag to write optional per domain summary. Specify with 'true' to output + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - output: + type: file + description: Human readable output summarizing hmmsearch results + pattern: "*.{txt.gz}" + - alignments: + type: file + description: Optional multiple sequence alignment (MSA) in Stockholm format + pattern: "*.{sto.gz}" + - target_summary: + type: file + description: Optional tabular (space-delimited) summary of per-target output + pattern: "*.{tbl.gz}" + - domain_summary: + type: file + description: Optional tabular (space-delimited) summary of per-domain output + pattern: "*.{domtbl.gz}" + +authors: + - "@Midnighter" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 93903a65..ddf496b0 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -775,6 +775,10 @@ hmmer/hmmalign: - modules/hmmer/hmmalign/** - tests/modules/hmmer/hmmalign/** +hmmer/hmmsearch: + - modules/hmmer/hmmsearch/** + - tests/modules/hmmer/hmmsearch/** + homer/annotatepeaks: - modules/homer/annotatepeaks/** - tests/modules/homer/annotatepeaks/** diff --git a/tests/modules/hmmer/hmmsearch/main.nf b/tests/modules/hmmer/hmmsearch/main.nf new file mode 100644 index 00000000..b181a529 --- /dev/null +++ b/tests/modules/hmmer/hmmsearch/main.nf @@ -0,0 +1,33 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { HMMER_HMMSEARCH } from '../../../../modules/hmmer/hmmsearch/main.nf' + +workflow test_hmmer_hmmsearch { + + input = [ + [ id:'test', single_end:false ], // meta map + file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/hmmer/bac.16S_rRNA.hmm.gz', checkIfExists: true), + file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/hmmer/e_coli_k12_16s.fna.gz', checkIfExists: true), + false, + false, + false + ] + + HMMER_HMMSEARCH ( input ) +} + +workflow test_hmmer_hmmsearch_optional { + + input = [ + [ id:'test', single_end:false ], // meta map + file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/hmmer/bac.16S_rRNA.hmm.gz', checkIfExists: true), + file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/hmmer/e_coli_k12_16s.fna.gz', checkIfExists: true), + true, + true, + true + ] + + HMMER_HMMSEARCH ( input ) +} diff --git a/tests/modules/hmmer/hmmsearch/nextflow.config b/tests/modules/hmmer/hmmsearch/nextflow.config new file mode 100644 index 00000000..50f50a7a --- /dev/null +++ b/tests/modules/hmmer/hmmsearch/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/hmmer/hmmsearch/test.yml b/tests/modules/hmmer/hmmsearch/test.yml new file mode 100644 index 00000000..91e4775c --- /dev/null +++ b/tests/modules/hmmer/hmmsearch/test.yml @@ -0,0 +1,31 @@ +- name: hmmer hmmsearch test_hmmer_hmmsearch + command: nextflow run tests/modules/hmmer/hmmsearch -entry test_hmmer_hmmsearch -c tests/config/nextflow.config + tags: + - hmmer/hmmsearch + - hmmer + files: + - path: output/hmmer/test.txt.gz + contains: + - '[ok]' + - path: output/hmmer/versions.yml + md5sum: ed0808c10abd205c6bd0fb01f45259bb + +- name: hmmer hmmsearch test_hmmer_hmmsearch_optional + command: nextflow run tests/modules/hmmer/hmmsearch -entry test_hmmer_hmmsearch_optional -c tests/config/nextflow.config + tags: + - hmmer/hmmsearch + - hmmer + files: + - path: output/hmmer/test.sto.gz + md5sum: d3121aa33455074c566fb7f8fdcda7b0 + - path: output/hmmer/test.domtbl.gz + contains: + - '# [ok]' + - path: output/hmmer/test.tbl.gz + contains: + - '# [ok]' + - path: output/hmmer/test.txt.gz + contains: + - '[ok]' + - path: output/hmmer/versions.yml + md5sum: ebdcb08ae540e840f7b5c4c75a3a2993