Add epang/place (#2023)

* Created epang module from template

* Module seems to be working

* Namechange to epang_place

* Move model to meta map

* Fix linting problems

* Prettier

* Forgot --threads!

* Moved module to epang

* Move reference file params

* Directory output

* Make all args optional

* Added inputs for three other types of file arguments

* Update modules/epang/main.nf

Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>

* Add directory to output doc, reorder the other

* Make model specification less hardcoded

Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>
This commit is contained in:
Daniel Lundin 2022-09-08 10:05:57 +02:00 committed by GitHub
parent 31166227d3
commit 74b569b66a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 171 additions and 0 deletions

56
modules/epang/main.nf Normal file
View file

@ -0,0 +1,56 @@
process EPANG {
tag "$meta.id"
label 'process_high'
conda (params.enable_conda ? "bioconda::epa-ng=0.3.8" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/epa-ng:0.3.8--h9a82719_1':
'quay.io/biocontainers/epa-ng:0.3.8--h9a82719_1' }"
input:
tuple val(meta), path(queryaln)
path referencealn
path referencetree
path bfastfile
path splitfile
path binaryfile
output:
tuple val(meta), path("./."), emit: epang , optional: true
path "*.epa_result.jplace" , emit: jplace , optional: true
path "*.epa_info.log" , emit: log
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def queryarg = queryaln ? "--query $queryaln" : ""
def refalnarg = referencealn ? "--ref-msa $referencealn" : ""
def reftreearg = referencetree ? "--tree $referencetree" : ""
def bfastarg = bfastfile ? "--bfast $bfastfile" : ""
def splitarg = splitfile ? "--split $splitfile" : ""
def binaryarg = binaryfile ? "--binary $binaryfile" : ""
if ( binaryfile && ( referencealn || referencetree ) ) error "[EPANG] Cannot supply both binary and reference MSA or reference tree. Check input"
"""
epa-ng \\
$args \\
--threads $task.cpus \\
$queryarg \\
$refalnarg \\
$reftreearg \\
$bfastarg \\
$splitarg \\
$binaryarg
[ -e epa_result.jplace ] && mv epa_result.jplace ${prefix}.epa_result.jplace
[ -e epa_info.log ] && mv epa_info.log ${prefix}.epa_info.log
cat <<-END_VERSIONS > versions.yml
"${task.process}":
epang: \$(echo \$(epa-ng --version 2>&1) | sed 's/^EPA-ng v//')
END_VERSIONS
"""
}

70
modules/epang/meta.yml Normal file
View file

@ -0,0 +1,70 @@
name: "epang"
description: phylogenetic placement of query sequences in a reference tree
keywords:
- phylogeny
- phylogenetic placement
- sequences
tools:
- "epang":
description: "Massively parallel phylogenetic placement of genetic sequences"
homepage: "https://github.com/Pbdas/epa-ng"
documentation: "https://github.com/Pbdas/epa-ng/wiki/Full-Stack-Example"
tool_dev_url: "https://github.com/Pbdas/epa-ng"
doi: "https://doi.org/10.1093/sysbio/syy054"
licence: "['GNU Affero General Public License v3.0']"
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test' ]
- queryaln:
type: file
description: aligned query sequences in any supported format including phylip and fasta, may be gzipped
pattern: "*"
- referencealn:
type: file
description: reference alignment in any supported format including phylip and fasta, may be gzipped
pattern: "*"
- referencetree:
type: file
description: newick file containing the reference tree in which query sequences will be placed
pattern: "*"
- bfastfile:
type: file
description: file argument to the --bfast parameter
pattern: "*"
- splitfile:
type: file
description: file argument to the --split parameter
pattern: "*"
- binaryfile:
type: file
description: file argument to the --binary parameter
pattern: "*"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test' ]
- epang:
type: directory
description: directory in which EPA-NG was run
- jplace:
type: file
description: file with placement information
pattern: "*.jplace"
- log:
type: file
description: log file from placement
pattern: "*.log"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@erikrikarddaniel"

View file

@ -719,6 +719,10 @@ entrezdirect/xtract:
- modules/entrezdirect/xtract/** - modules/entrezdirect/xtract/**
- tests/modules/entrezdirect/xtract/** - tests/modules/entrezdirect/xtract/**
epang:
- modules/epang/**
- tests/modules/epang/
expansionhunter: expansionhunter:
- modules/expansionhunter/** - modules/expansionhunter/**
- tests/modules/expansionhunter/** - tests/modules/expansionhunter/**

View file

@ -0,0 +1,22 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { EPANG } from '../../../modules/epang/main.nf'
workflow test_epang {
input = [
[ id:'test', model:'LG' ], // meta map
file('https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/epang/query.alnfaa.gz', checkIfExists: true)
]
EPANG (
input,
file('https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/epang/reference.alnfaa.gz', checkIfExists: true),
file('https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/epang/reference.newick', checkIfExists: true),
[],
[],
[]
)
}

View file

@ -0,0 +1,7 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
ext.args = { " --model ${meta.model} " }
}

View file

@ -0,0 +1,12 @@
- name: epang test_epang
command: nextflow run ./tests/modules/epang -entry test_epang -c ./tests/config/nextflow.config -c ./tests/modules/epang/nextflow.config
tags:
- epang
files:
- path: output/epang/test.epa_info.log
contains:
- "INFO 3 Sequences done"
- path: output/epang/test.epa_result.jplace
contains:
- '"placements":'
- '"metadata": {"invocation": "epa-ng --model LG --threads 2 --query query.alnfaa.gz --ref-msa reference.alnfaa.gz --tree reference.newick "}'