Added hmmer/eslalimask (#1999)

* Created hmmer/eslalimask from template

* esl-alimask module with --rf-is-mask test case

* Add optional file output

* Add hmmer to test name

* Move from process_single to process_low

* Test for versions.yml, plus content

* Prettier

* Avoid gzipping input alignment

* meta.id to prefix

* Fix version string so it's from *this* tool
This commit is contained in:
Daniel Lundin 2022-09-05 14:01:33 +02:00 committed by GitHub
parent e7845e2472
commit b5e3d16d80
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 207 additions and 0 deletions

View file

@ -0,0 +1,56 @@
process HMMER_ESLALIMASK {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "bioconda::hmmer=3.3.2" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/hmmer:3.3.2--h1b792b2_1':
'quay.io/biocontainers/hmmer:3.3.2--h1b792b2_1' }"
input:
tuple val(meta), path(unmaskedaln), val(fmask_rf), val(fmask_all), val(gmask_rf), val(gmask_all), val(pmask_rf), val(pmask_all)
path maskfile
output:
tuple val(meta), path("*.masked.sthlm.gz"), emit: maskedaln
path "*.fmask-rf.gz" , emit: fmask_rf , optional: true
path "*.fmask-all.gz" , emit: fmask_all, optional: true
path "*.gmask-rf.gz" , emit: gmask_rf , optional: true
path "*.gmask-all.gz" , emit: gmask_all, optional: true
path "*.pmask-rf.gz" , emit: pmask_rf , optional: true
path "*.pmask-all.gz" , emit: pmask_all, optional: true
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def fmask_rfarg = fmask_rf ? "--fmask-rf ${prefix}.fmask-rf" : ""
def fmask_allarg = fmask_all ? "--fmask-all ${prefix}.fmask-all" : ""
def gmask_rfarg = gmask_rf ? "--gmask-rf ${prefix}.gmask-rf" : ""
def gmask_allarg = gmask_all ? "--gmask-all ${prefix}.gmask-all" : ""
def pmask_rfarg = pmask_rf ? "--pmask-rf ${prefix}.pmask-rf" : ""
def pmask_allarg = pmask_all ? "--pmask-all ${prefix}.pmask-all" : ""
"""
esl-alimask \\
$args \\
$fmask_rfarg \\
$fmask_allarg \\
$gmask_rfarg \\
$gmask_allarg \\
$pmask_rfarg \\
$pmask_allarg \\
-o ${prefix}.masked.sthlm \\
$unmaskedaln \\
$maskfile
gzip ${prefix}.*mask*
cat <<-END_VERSIONS > versions.yml
"${task.process}":
hmmer/easel: \$(esl-reformat -h | grep -o '^# Easel [0-9.]*' | sed 's/^# Easel *//')
END_VERSIONS
"""
}

View file

@ -0,0 +1,105 @@
name: "hmmer_eslalimask"
description: Mask multiple sequence alignments
keywords:
- hmmer
- alignment
- mask
tools:
- "hmmer":
description: "Biosequence analysis using profile hidden Markov models"
homepage: http://hmmer.org/
documentation: http://hmmer.org/documentation.html
tool_dev_url: None
doi: "http://dx.doi.org/10.1371/journal.pcbi.1002195"
licence: ["BSD-3-Clause"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test' ]
- unmaskedaln:
type: file
description: multiple sequence alignment, Stockholm or other formats
pattern: "*"
- fmask_rf:
type: val
description: Flag to output optional file with final mask of non-gap RF len
- fmask_all:
type: val
description: Flag to output optional file with final mask of full aln len
- gmask_rf:
type: val
description: Flag to output optional file gap-based 0/1 mask of non-gap RF len
- gmask_all:
type: val
description: Flag to output optional file gap-based 0/1 mask of full aln len
- pmask_rf:
type: val
description: Flag to output optional file with PP-based 0/1 mask of non-gap RF len
- pmask_all:
type: val
description: Flag to output optional file with PP-based 0/1 mask of full aln len
- maskfile:
type: file
description: mask file, see program documentation
pattern: "*"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- maskedaln:
type: file
description: Masked alignment in gzipped Stockholm format
pattern: "*.sthlm.gz"
- fmask_rf:
type: file
description: File with final mask of non-gap RF len
pattern: "*.fmask-rf.gz"
- fmask_all:
type: file
description: File with final mask of full aln len
pattern: "*.fmask-all.gz"
- gmask_rf:
type: file
description: File with gap-based 0/1 mask of non-gap RF len
pattern: "*.gmask-rf.gz"
- gmask_all:
type: file
description: File with gap-based 0/1 mask of full aln len
pattern: "*.gmask-all.gz"
- pmask_rf:
type: file
description: File with PP-based 0/1 mask of non-gap RF len
pattern: "*.pmask-rf.gz"
- pmask_all:
type: file
description: File with PP-based 0/1 mask of full aln len
pattern: "*.pmask-all.gz"
authors:
- "@erikrikarddaniel"

View file

@ -1150,6 +1150,10 @@ hmmcopy/readcounter:
- modules/hmmcopy/readcounter/** - modules/hmmcopy/readcounter/**
- tests/modules/hmmcopy/readcounter/** - tests/modules/hmmcopy/readcounter/**
hmmer/eslalimask:
- modules/hmmer/eslalimask/**
- tests/modules/hmmer/eslalimask/**
hmmer/hmmalign: hmmer/hmmalign:
- modules/hmmer/hmmalign/** - modules/hmmer/hmmalign/**
- tests/modules/hmmer/hmmalign/** - tests/modules/hmmer/hmmalign/**

View file

@ -0,0 +1,20 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { HMMER_HMMALIGN } from '../../../../modules/hmmer/hmmalign/main.nf'
include { HMMER_ESLALIMASK as RFMASK } from '../../../../modules/hmmer/eslalimask/main.nf'
workflow test_hmmer_rfmask {
input = [
[ id:'test' ], // meta map
file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/hmmer/e_coli_k12_16s.fna.gz') // Change to params.test_data syntax after the data is included in tests/config/test_data.config
]
hmm = file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/hmmer/bac.16S_rRNA.hmm.gz')
HMMER_HMMALIGN ( input, hmm )
RFMASK ( HMMER_HMMALIGN.out.sthlm, [] )
}

View file

@ -0,0 +1,9 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
withName: RFMASK {
ext.args = "--rf-is-mask"
}
}

View file

@ -0,0 +1,13 @@
- name: hmmer eslalimask test_hmmer_rfmask
command: nextflow run ./tests/modules/hmmer/eslalimask -entry test_hmmer_rfmask -c ./tests/config/nextflow.config -c ./tests/modules/hmmer/eslalimask/nextflow.config
tags:
- hmmer
- hmmer/eslalimask
files:
- path: output/rfmask/test.masked.sthlm.gz
contains:
- "GS CP025268.1:223601-225154 DE Escherichia coli"
- "=GC RF"
- path: output/rfmask/versions.yml
contains:
- "easel:"