From b5e3d16d80abfc6aa1c86ffde8c1b507bea80f0d Mon Sep 17 00:00:00 2001 From: Daniel Lundin Date: Mon, 5 Sep 2022 14:01:33 +0200 Subject: [PATCH] Added hmmer/eslalimask (#1999) * Created hmmer/eslalimask from template * esl-alimask module with --rf-is-mask test case * Add optional file output * Add hmmer to test name * Move from process_single to process_low * Test for versions.yml, plus content * Prettier * Avoid gzipping input alignment * meta.id to prefix * Fix version string so it's from *this* tool --- modules/hmmer/eslalimask/main.nf | 56 ++++++++++ modules/hmmer/eslalimask/meta.yml | 105 ++++++++++++++++++ tests/config/pytest_modules.yml | 4 + tests/modules/hmmer/eslalimask/main.nf | 20 ++++ .../modules/hmmer/eslalimask/nextflow.config | 9 ++ tests/modules/hmmer/eslalimask/test.yml | 13 +++ 6 files changed, 207 insertions(+) create mode 100644 modules/hmmer/eslalimask/main.nf create mode 100644 modules/hmmer/eslalimask/meta.yml create mode 100644 tests/modules/hmmer/eslalimask/main.nf create mode 100644 tests/modules/hmmer/eslalimask/nextflow.config create mode 100644 tests/modules/hmmer/eslalimask/test.yml diff --git a/modules/hmmer/eslalimask/main.nf b/modules/hmmer/eslalimask/main.nf new file mode 100644 index 00000000..06213868 --- /dev/null +++ b/modules/hmmer/eslalimask/main.nf @@ -0,0 +1,56 @@ +process HMMER_ESLALIMASK { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::hmmer=3.3.2" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/hmmer:3.3.2--h1b792b2_1': + 'quay.io/biocontainers/hmmer:3.3.2--h1b792b2_1' }" + + input: + tuple val(meta), path(unmaskedaln), val(fmask_rf), val(fmask_all), val(gmask_rf), val(gmask_all), val(pmask_rf), val(pmask_all) + path maskfile + + output: + tuple val(meta), path("*.masked.sthlm.gz"), emit: maskedaln + path "*.fmask-rf.gz" , emit: fmask_rf , optional: true + path "*.fmask-all.gz" , emit: fmask_all, optional: true + path "*.gmask-rf.gz" , emit: gmask_rf , optional: true + path "*.gmask-all.gz" , emit: gmask_all, optional: true + path "*.pmask-rf.gz" , emit: pmask_rf , optional: true + path "*.pmask-all.gz" , emit: pmask_all, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def fmask_rfarg = fmask_rf ? "--fmask-rf ${prefix}.fmask-rf" : "" + def fmask_allarg = fmask_all ? "--fmask-all ${prefix}.fmask-all" : "" + def gmask_rfarg = gmask_rf ? "--gmask-rf ${prefix}.gmask-rf" : "" + def gmask_allarg = gmask_all ? "--gmask-all ${prefix}.gmask-all" : "" + def pmask_rfarg = pmask_rf ? "--pmask-rf ${prefix}.pmask-rf" : "" + def pmask_allarg = pmask_all ? "--pmask-all ${prefix}.pmask-all" : "" + """ + esl-alimask \\ + $args \\ + $fmask_rfarg \\ + $fmask_allarg \\ + $gmask_rfarg \\ + $gmask_allarg \\ + $pmask_rfarg \\ + $pmask_allarg \\ + -o ${prefix}.masked.sthlm \\ + $unmaskedaln \\ + $maskfile + + gzip ${prefix}.*mask* + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + hmmer/easel: \$(esl-reformat -h | grep -o '^# Easel [0-9.]*' | sed 's/^# Easel *//') + END_VERSIONS + """ +} diff --git a/modules/hmmer/eslalimask/meta.yml b/modules/hmmer/eslalimask/meta.yml new file mode 100644 index 00000000..553f0574 --- /dev/null +++ b/modules/hmmer/eslalimask/meta.yml @@ -0,0 +1,105 @@ +name: "hmmer_eslalimask" +description: Mask multiple sequence alignments +keywords: + - hmmer + - alignment + - mask +tools: + - "hmmer": + description: "Biosequence analysis using profile hidden Markov models" + homepage: http://hmmer.org/ + documentation: http://hmmer.org/documentation.html + tool_dev_url: None + doi: "http://dx.doi.org/10.1371/journal.pcbi.1002195" + licence: ["BSD-3-Clause"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + + - unmaskedaln: + type: file + description: multiple sequence alignment, Stockholm or other formats + pattern: "*" + + - fmask_rf: + type: val + description: Flag to output optional file with final mask of non-gap RF len + + - fmask_all: + type: val + description: Flag to output optional file with final mask of full aln len + + - gmask_rf: + type: val + description: Flag to output optional file gap-based 0/1 mask of non-gap RF len + + - gmask_all: + type: val + description: Flag to output optional file gap-based 0/1 mask of full aln len + + - pmask_rf: + type: val + description: Flag to output optional file with PP-based 0/1 mask of non-gap RF len + + - pmask_all: + type: val + description: Flag to output optional file with PP-based 0/1 mask of full aln len + + - maskfile: + type: file + description: mask file, see program documentation + pattern: "*" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + + - maskedaln: + type: file + description: Masked alignment in gzipped Stockholm format + pattern: "*.sthlm.gz" + + - fmask_rf: + type: file + description: File with final mask of non-gap RF len + pattern: "*.fmask-rf.gz" + + - fmask_all: + type: file + description: File with final mask of full aln len + pattern: "*.fmask-all.gz" + + - gmask_rf: + type: file + description: File with gap-based 0/1 mask of non-gap RF len + pattern: "*.gmask-rf.gz" + + - gmask_all: + type: file + description: File with gap-based 0/1 mask of full aln len + pattern: "*.gmask-all.gz" + + - pmask_rf: + type: file + description: File with PP-based 0/1 mask of non-gap RF len + pattern: "*.pmask-rf.gz" + + - pmask_all: + type: file + description: File with PP-based 0/1 mask of full aln len + pattern: "*.pmask-all.gz" + +authors: + - "@erikrikarddaniel" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index b8ca1f1e..520739a1 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -1150,6 +1150,10 @@ hmmcopy/readcounter: - modules/hmmcopy/readcounter/** - tests/modules/hmmcopy/readcounter/** +hmmer/eslalimask: + - modules/hmmer/eslalimask/** + - tests/modules/hmmer/eslalimask/** + hmmer/hmmalign: - modules/hmmer/hmmalign/** - tests/modules/hmmer/hmmalign/** diff --git a/tests/modules/hmmer/eslalimask/main.nf b/tests/modules/hmmer/eslalimask/main.nf new file mode 100644 index 00000000..6a37b345 --- /dev/null +++ b/tests/modules/hmmer/eslalimask/main.nf @@ -0,0 +1,20 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { HMMER_HMMALIGN } from '../../../../modules/hmmer/hmmalign/main.nf' +include { HMMER_ESLALIMASK as RFMASK } from '../../../../modules/hmmer/eslalimask/main.nf' + +workflow test_hmmer_rfmask { + + input = [ + [ id:'test' ], // meta map + file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/hmmer/e_coli_k12_16s.fna.gz') // Change to params.test_data syntax after the data is included in tests/config/test_data.config + ] + + hmm = file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/hmmer/bac.16S_rRNA.hmm.gz') + + HMMER_HMMALIGN ( input, hmm ) + + RFMASK ( HMMER_HMMALIGN.out.sthlm, [] ) +} diff --git a/tests/modules/hmmer/eslalimask/nextflow.config b/tests/modules/hmmer/eslalimask/nextflow.config new file mode 100644 index 00000000..399dfd49 --- /dev/null +++ b/tests/modules/hmmer/eslalimask/nextflow.config @@ -0,0 +1,9 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + + withName: RFMASK { + ext.args = "--rf-is-mask" + } + +} diff --git a/tests/modules/hmmer/eslalimask/test.yml b/tests/modules/hmmer/eslalimask/test.yml new file mode 100644 index 00000000..3bb67666 --- /dev/null +++ b/tests/modules/hmmer/eslalimask/test.yml @@ -0,0 +1,13 @@ +- name: hmmer eslalimask test_hmmer_rfmask + command: nextflow run ./tests/modules/hmmer/eslalimask -entry test_hmmer_rfmask -c ./tests/config/nextflow.config -c ./tests/modules/hmmer/eslalimask/nextflow.config + tags: + - hmmer + - hmmer/eslalimask + files: + - path: output/rfmask/test.masked.sthlm.gz + contains: + - "GS CP025268.1:223601-225154 DE Escherichia coli" + - "=GC RF" + - path: output/rfmask/versions.yml + contains: + - "easel:"