From 74b569b66a06ce9d62f30396b6b201a68bd4bb36 Mon Sep 17 00:00:00 2001 From: Daniel Lundin Date: Thu, 8 Sep 2022 10:05:57 +0200 Subject: [PATCH] Add epang/place (#2023) * Created epang module from template * Module seems to be working * Namechange to epang_place * Move model to meta map * Fix linting problems * Prettier * Forgot --threads! * Moved module to epang * Move reference file params * Directory output * Make all args optional * Added inputs for three other types of file arguments * Update modules/epang/main.nf Co-authored-by: James A. Fellows Yates * Add directory to output doc, reorder the other * Make model specification less hardcoded Co-authored-by: James A. Fellows Yates --- modules/epang/main.nf | 56 +++++++++++++++++++++++ modules/epang/meta.yml | 70 +++++++++++++++++++++++++++++ tests/config/pytest_modules.yml | 4 ++ tests/modules/epang/main.nf | 22 +++++++++ tests/modules/epang/nextflow.config | 7 +++ tests/modules/epang/test.yml | 12 +++++ 6 files changed, 171 insertions(+) create mode 100644 modules/epang/main.nf create mode 100644 modules/epang/meta.yml create mode 100644 tests/modules/epang/main.nf create mode 100644 tests/modules/epang/nextflow.config create mode 100644 tests/modules/epang/test.yml diff --git a/modules/epang/main.nf b/modules/epang/main.nf new file mode 100644 index 00000000..8eaaa226 --- /dev/null +++ b/modules/epang/main.nf @@ -0,0 +1,56 @@ +process EPANG { + tag "$meta.id" + label 'process_high' + + conda (params.enable_conda ? "bioconda::epa-ng=0.3.8" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/epa-ng:0.3.8--h9a82719_1': + 'quay.io/biocontainers/epa-ng:0.3.8--h9a82719_1' }" + + input: + tuple val(meta), path(queryaln) + path referencealn + path referencetree + path bfastfile + path splitfile + path binaryfile + + output: + tuple val(meta), path("./."), emit: epang , optional: true + path "*.epa_result.jplace" , emit: jplace , optional: true + path "*.epa_info.log" , emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def queryarg = queryaln ? "--query $queryaln" : "" + def refalnarg = referencealn ? "--ref-msa $referencealn" : "" + def reftreearg = referencetree ? "--tree $referencetree" : "" + def bfastarg = bfastfile ? "--bfast $bfastfile" : "" + def splitarg = splitfile ? "--split $splitfile" : "" + def binaryarg = binaryfile ? "--binary $binaryfile" : "" + if ( binaryfile && ( referencealn || referencetree ) ) error "[EPANG] Cannot supply both binary and reference MSA or reference tree. Check input" + """ + epa-ng \\ + $args \\ + --threads $task.cpus \\ + $queryarg \\ + $refalnarg \\ + $reftreearg \\ + $bfastarg \\ + $splitarg \\ + $binaryarg + + [ -e epa_result.jplace ] && mv epa_result.jplace ${prefix}.epa_result.jplace + [ -e epa_info.log ] && mv epa_info.log ${prefix}.epa_info.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + epang: \$(echo \$(epa-ng --version 2>&1) | sed 's/^EPA-ng v//') + END_VERSIONS + """ +} diff --git a/modules/epang/meta.yml b/modules/epang/meta.yml new file mode 100644 index 00000000..18357b9e --- /dev/null +++ b/modules/epang/meta.yml @@ -0,0 +1,70 @@ +name: "epang" +description: phylogenetic placement of query sequences in a reference tree +keywords: + - phylogeny + - phylogenetic placement + - sequences +tools: + - "epang": + description: "Massively parallel phylogenetic placement of genetic sequences" + homepage: "https://github.com/Pbdas/epa-ng" + documentation: "https://github.com/Pbdas/epa-ng/wiki/Full-Stack-Example" + tool_dev_url: "https://github.com/Pbdas/epa-ng" + doi: "https://doi.org/10.1093/sysbio/syy054" + licence: "['GNU Affero General Public License v3.0']" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - queryaln: + type: file + description: aligned query sequences in any supported format including phylip and fasta, may be gzipped + pattern: "*" + - referencealn: + type: file + description: reference alignment in any supported format including phylip and fasta, may be gzipped + pattern: "*" + - referencetree: + type: file + description: newick file containing the reference tree in which query sequences will be placed + pattern: "*" + - bfastfile: + type: file + description: file argument to the --bfast parameter + pattern: "*" + - splitfile: + type: file + description: file argument to the --split parameter + pattern: "*" + - binaryfile: + type: file + description: file argument to the --binary parameter + pattern: "*" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - epang: + type: directory + description: directory in which EPA-NG was run + - jplace: + type: file + description: file with placement information + pattern: "*.jplace" + - log: + type: file + description: log file from placement + pattern: "*.log" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@erikrikarddaniel" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index c264b919..81c184ba 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -719,6 +719,10 @@ entrezdirect/xtract: - modules/entrezdirect/xtract/** - tests/modules/entrezdirect/xtract/** +epang: + - modules/epang/** + - tests/modules/epang/ + expansionhunter: - modules/expansionhunter/** - tests/modules/expansionhunter/** diff --git a/tests/modules/epang/main.nf b/tests/modules/epang/main.nf new file mode 100644 index 00000000..d2c64ad8 --- /dev/null +++ b/tests/modules/epang/main.nf @@ -0,0 +1,22 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { EPANG } from '../../../modules/epang/main.nf' + +workflow test_epang { + + input = [ + [ id:'test', model:'LG' ], // meta map + file('https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/epang/query.alnfaa.gz', checkIfExists: true) + ] + + EPANG ( + input, + file('https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/epang/reference.alnfaa.gz', checkIfExists: true), + file('https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/epang/reference.newick', checkIfExists: true), + [], + [], + [] + ) +} diff --git a/tests/modules/epang/nextflow.config b/tests/modules/epang/nextflow.config new file mode 100644 index 00000000..71784eb5 --- /dev/null +++ b/tests/modules/epang/nextflow.config @@ -0,0 +1,7 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + + ext.args = { " --model ${meta.model} " } + +} diff --git a/tests/modules/epang/test.yml b/tests/modules/epang/test.yml new file mode 100644 index 00000000..84630b1f --- /dev/null +++ b/tests/modules/epang/test.yml @@ -0,0 +1,12 @@ +- name: epang test_epang + command: nextflow run ./tests/modules/epang -entry test_epang -c ./tests/config/nextflow.config -c ./tests/modules/epang/nextflow.config + tags: + - epang + files: + - path: output/epang/test.epa_info.log + contains: + - "INFO 3 Sequences done" + - path: output/epang/test.epa_result.jplace + contains: + - '"placements":' + - '"metadata": {"invocation": "epa-ng --model LG --threads 2 --query query.alnfaa.gz --ref-msa reference.alnfaa.gz --tree reference.newick "}'