From 6702d2e14555076a106f6d04081b35ab85e25210 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Tue, 5 Jul 2022 09:36:22 +0200 Subject: [PATCH] Add MEGAN/DAA2INFO (#1848) * Add daa2info * Add right flag in the config * Fix config * Apply suggestions from code review Co-authored-by: Jose Espinosa-Carrasco Co-authored-by: Jose Espinosa-Carrasco --- modules/megan/daa2info/main.nf | 38 ++++++++++++++ modules/megan/daa2info/meta.yml | 52 ++++++++++++++++++++ tests/config/pytest_modules.yml | 4 ++ tests/modules/megan/daa2info/main.nf | 20 ++++++++ tests/modules/megan/daa2info/nextflow.config | 9 ++++ tests/modules/megan/daa2info/test.yml | 10 ++++ 6 files changed, 133 insertions(+) create mode 100644 modules/megan/daa2info/main.nf create mode 100644 modules/megan/daa2info/meta.yml create mode 100644 tests/modules/megan/daa2info/main.nf create mode 100644 tests/modules/megan/daa2info/nextflow.config create mode 100644 tests/modules/megan/daa2info/test.yml diff --git a/modules/megan/daa2info/main.nf b/modules/megan/daa2info/main.nf new file mode 100644 index 00000000..1c535aa9 --- /dev/null +++ b/modules/megan/daa2info/main.nf @@ -0,0 +1,38 @@ +process MEGAN_DAA2INFO { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::megan=6.21.7" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/megan:6.21.7--h9ee0642_0': + 'quay.io/biocontainers/megan:6.21.7--h9ee0642_0' }" + + input: + tuple val(meta), path(daa) + val(megan_summary) + + output: + tuple val(meta), path("*.txt.gz") , emit: txt_gz + tuple val(meta), path("*.megan"), optional: true, emit: megan + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def summary = megan_summary ? "-es ${prefix}.megan" : "" + """ + daa2info \\ + -i ${daa} \\ + -o ${prefix}.txt.gz \\ + ${summary} \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + megan: \$(echo \$(rma2info 2>&1) | grep version | sed 's/.*version //g;s/, built.*//g') + END_VERSIONS + """ +} diff --git a/modules/megan/daa2info/meta.yml b/modules/megan/daa2info/meta.yml new file mode 100644 index 00000000..0a6bb442 --- /dev/null +++ b/modules/megan/daa2info/meta.yml @@ -0,0 +1,52 @@ +name: "megan_daa2info" +description: Analyses a DAA file and exports information in text format +keywords: + - megan + - diamond + - daa + - classification + - conversion +tools: + - "megan": + description: "A tool for studying the taxonomic content of a set of DNA reads" + homepage: "https://uni-tuebingen.de/fakultaeten/mathematisch-naturwissenschaftliche-fakultaet/fachbereiche/informatik/lehrstuehle/algorithms-in-bioinformatics/software/megan6/" + documentation: "https://software-ab.informatik.uni-tuebingen.de/download/megan6/welcome.html" + tool_dev_url: "https://github.com/husonlab/megan-ce" + doi: "10.1371/journal.pcbi.1004957" + licence: "['GPL >=3']" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - daa: + type: file + description: DAA file from DIAMOND + pattern: "*.daa" + - megan_summary: + type: boolean + description: Specify whether to generate a MEGAN summary file + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - txt_gz: + type: file + description: Compressed text file + pattern: "*.txt.gz" + - megan: + type: file + description: Optionally generated MEGAN summary file + pattern: "*.megan" + +authors: + - "@jfy133" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 5f78bd55..d7953cd1 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -1378,6 +1378,10 @@ megahit: - modules/megahit/** - tests/modules/megahit/** +megan/daa2info: + - modules/megan/daa2info/** + - tests/modules/megan/daa2info/** + megan/rma2info: - modules/megan/rma2info/** - tests/modules/megan/rma2info/** diff --git a/tests/modules/megan/daa2info/main.nf b/tests/modules/megan/daa2info/main.nf new file mode 100644 index 00000000..840a14a2 --- /dev/null +++ b/tests/modules/megan/daa2info/main.nf @@ -0,0 +1,20 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { DIAMOND_MAKEDB } from '../../../../modules/diamond/makedb/main.nf' +include { DIAMOND_BLASTX } from '../../../../modules/diamond/blastx/main.nf' +include { MEGAN_DAA2INFO } from '../../../../modules/megan/daa2info/main.nf' + +workflow test_megan_daa2info { + + db = [ file(params.test_data['candidatus_portiera_aleyrodidarum']['genome']['proteome_fasta'], checkIfExists: true) ] + fasta = [ file(params.test_data['candidatus_portiera_aleyrodidarum']['genome']['genome_fasta'], checkIfExists: true) ] + out_ext = 'daa' + blast_columns = [] + megan_summary = true + + DIAMOND_MAKEDB ( db ) + DIAMOND_BLASTX ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db, out_ext, blast_columns ) + MEGAN_DAA2INFO ( DIAMOND_BLASTX.out.daa, megan_summary ) +} diff --git a/tests/modules/megan/daa2info/nextflow.config b/tests/modules/megan/daa2info/nextflow.config new file mode 100644 index 00000000..b2f69d81 --- /dev/null +++ b/tests/modules/megan/daa2info/nextflow.config @@ -0,0 +1,9 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + + withName: MEGAN_DAA2INFO { + ext.args = "-l" + } + +} diff --git a/tests/modules/megan/daa2info/test.yml b/tests/modules/megan/daa2info/test.yml new file mode 100644 index 00000000..1dbdc3c9 --- /dev/null +++ b/tests/modules/megan/daa2info/test.yml @@ -0,0 +1,10 @@ +- name: megan daa2info test_megan_daa2info + command: nextflow run ./tests/modules/megan/daa2info -entry test_megan_daa2info -c ./tests/config/nextflow.config -c ./tests/modules/megan/daa2info/nextflow.config + tags: + - megan/daa2info + - megan + files: + - path: output/megan/test.megan + contains: ["@Creator"] + - path: output/megan/test.txt.gz + contains: ["# Number of reads: 1"]