eslreformat: descriptive suffix (#2081)

This commit is contained in:
Daniel Lundin 2022-09-19 09:49:00 +02:00 committed by GitHub
parent f62e4cf5f0
commit b6f18f8733
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 38 additions and 6 deletions

View file

@ -11,8 +11,8 @@ process HMMER_ESLREFORMAT {
tuple val(meta), path(seqfile) tuple val(meta), path(seqfile)
output: output:
tuple val(meta), path("*.sequences.gz"), emit: seqreformated tuple val(meta), path("*.*.gz"), emit: seqreformated
path "versions.yml" , emit: versions path "versions.yml" , emit: versions
when: when:
task.ext.when == null || task.ext.when task.ext.when == null || task.ext.when
@ -20,6 +20,7 @@ process HMMER_ESLREFORMAT {
script: script:
def args = task.ext.args ?: '' def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}" def prefix = task.ext.prefix ?: "${meta.id}"
def suffix = args ? args.trim().tokenize(" ")[-1] : "sequences"
// Use for any postprocessing of the sequence file, e.g. removal of gap characters // Use for any postprocessing of the sequence file, e.g. removal of gap characters
def postproc = task.ext.postprocessing ?: "" def postproc = task.ext.postprocessing ?: ""
""" """
@ -27,7 +28,7 @@ process HMMER_ESLREFORMAT {
$args \\ $args \\
$seqfile \\ $seqfile \\
$postproc \\ $postproc \\
| gzip -c > ${prefix}.sequences.gz | gzip -c > ${prefix}.${suffix}.gz
cat <<-END_VERSIONS > versions.yml cat <<-END_VERSIONS > versions.yml
"${task.process}": "${task.process}":

View file

@ -35,7 +35,7 @@ output:
- seqreformated: - seqreformated:
type: file type: file
description: Reformated sequence file description: Reformated sequence file
pattern: "*.sequences.gz" pattern: "*.*.gz"
authors: authors:
- "@erikrikarddaniel" - "@erikrikarddaniel"

View file

@ -4,6 +4,7 @@ nextflow.enable.dsl = 2
include { HMMER_HMMALIGN } from '../../../../modules/hmmer/hmmalign/main.nf' include { HMMER_HMMALIGN } from '../../../../modules/hmmer/hmmalign/main.nf'
include { HMMER_ESLREFORMAT as HMMER_ESLREFORMAT_AFA } from '../../../../modules/hmmer/eslreformat/main.nf' include { HMMER_ESLREFORMAT as HMMER_ESLREFORMAT_AFA } from '../../../../modules/hmmer/eslreformat/main.nf'
include { HMMER_ESLREFORMAT as HMMER_ESLREFORMAT_PHYLIP } from '../../../../modules/hmmer/eslreformat/main.nf'
include { HMMER_ESLREFORMAT as HMMER_ESLREFORMAT_UNALIGN } from '../../../../modules/hmmer/eslreformat/main.nf' include { HMMER_ESLREFORMAT as HMMER_ESLREFORMAT_UNALIGN } from '../../../../modules/hmmer/eslreformat/main.nf'
workflow test_hmmer_eslreformat_afa { workflow test_hmmer_eslreformat_afa {
@ -20,6 +21,20 @@ workflow test_hmmer_eslreformat_afa {
HMMER_ESLREFORMAT_AFA ( HMMER_HMMALIGN.out.sthlm ) HMMER_ESLREFORMAT_AFA ( HMMER_HMMALIGN.out.sthlm )
} }
workflow test_hmmer_eslreformat_phylip {
input = [
[ id:'test' ], // meta map
file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/hmmer/e_coli_k12_16s.fna.gz') // Change to params.test_data syntax after the data is included in tests/config/test_data.config
]
hmm = file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/hmmer/bac.16S_rRNA.hmm.gz')
HMMER_HMMALIGN ( input, hmm )
HMMER_ESLREFORMAT_PHYLIP ( HMMER_HMMALIGN.out.sthlm )
}
workflow test_hmmer_eslreformat_unalign { workflow test_hmmer_eslreformat_unalign {
input = [ input = [

View file

@ -6,6 +6,10 @@ process {
ext.args = 'afa' ext.args = 'afa'
} }
withName: HMMER_ESLREFORMAT_PHYLIP {
ext.args = ' -u phylip '
}
withName: HMMER_ESLREFORMAT_UNALIGN { withName: HMMER_ESLREFORMAT_UNALIGN {
ext.args = '--gapsym=- afa' ext.args = '--gapsym=- afa'
ext.postprocessing = '| sed "/^>/!s/-//g"' ext.postprocessing = '| sed "/^>/!s/-//g"'

View file

@ -4,20 +4,32 @@
- hmmer/eslreformat - hmmer/eslreformat
- hmmer - hmmer
files: files:
- path: output/hmmer/test.sequences.gz - path: output/hmmer/test.afa.gz
contains: contains:
- ">CP025268.1" - ">CP025268.1"
- "aaa---UUGAAGAGUUUGAUCAUGGCUCAGAUUGAACGCUGGCGGCAGGCCUAACACAUG" - "aaa---UUGAAGAGUUUGAUCAUGGCUCAGAUUGAACGCUGGCGGCAGGCCUAACACAUG"
- path: output/hmmer/versions.yml - path: output/hmmer/versions.yml
contains: contains:
- "easel:" - "easel:"
- name: hmmer eslreformat test_hmmer_eslreformat_phylip
command: nextflow run ./tests/modules/hmmer/eslreformat -entry test_hmmer_eslreformat_phylip -c ./tests/config/nextflow.config -c ./tests/modules/hmmer/eslreformat/nextflow.config
tags:
- hmmer/eslreformat
- hmmer
files:
- path: output/hmmer/test.phylip.gz
contains:
- "CP025268.1 AAA---UUGAAGAGUUUGAUCAUGGCUCAGAUUGAACGCUGGCGGCAGGCCUAACACAUG"
- path: output/hmmer/versions.yml
contains:
- "easel:"
- name: hmmer eslreformat test_hmmer_eslreformat_unalign - name: hmmer eslreformat test_hmmer_eslreformat_unalign
command: nextflow run ./tests/modules/hmmer/eslreformat -entry test_hmmer_eslreformat_unalign -c ./tests/config/nextflow.config -c ./tests/modules/hmmer/eslreformat/nextflow.config command: nextflow run ./tests/modules/hmmer/eslreformat -entry test_hmmer_eslreformat_unalign -c ./tests/config/nextflow.config -c ./tests/modules/hmmer/eslreformat/nextflow.config
tags: tags:
- hmmer/eslreformat - hmmer/eslreformat
- hmmer - hmmer
files: files:
- path: output/hmmer/test.sequences.gz - path: output/hmmer/test.afa.gz
contains: contains:
- ">CP025268.1" - ">CP025268.1"
- "aaaUUGAAGAGUUUGAUCAUGGCUCAGAUUGAACGCUGGCGGCAGGCCUAACACAUG" - "aaaUUGAAGAGUUUGAUCAUGGCUCAGAUUGAACGCUGGCGGCAGGCCUAACACAUG"