From 53b324281f9bdf0b8aa7aeae8410a65a7c5a0553 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Tue, 1 Feb 2022 11:47:01 +0100 Subject: [PATCH] Raven assembler (#1087) Adding a new module for [Raven](https://github.com/lbcb-sci/raven), a De novo genome assembler for long uncorrected reads. --- modules/raven/main.nf | 39 ++++++++++++++++++++++++ modules/raven/meta.yml | 47 +++++++++++++++++++++++++++++ tests/config/pytest_modules.yml | 4 +++ tests/modules/raven/main.nf | 15 +++++++++ tests/modules/raven/nextflow.config | 5 +++ tests/modules/raven/test.yml | 9 ++++++ 6 files changed, 119 insertions(+) create mode 100644 modules/raven/main.nf create mode 100644 modules/raven/meta.yml create mode 100644 tests/modules/raven/main.nf create mode 100644 tests/modules/raven/nextflow.config create mode 100644 tests/modules/raven/test.yml diff --git a/modules/raven/main.nf b/modules/raven/main.nf new file mode 100644 index 00000000..0d81b94b --- /dev/null +++ b/modules/raven/main.nf @@ -0,0 +1,39 @@ +process RAVEN { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::raven-assembler=1.6.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/raven-assembler:1.6.1--h2e03b76_0' : + 'quay.io/biocontainers/raven-assembler:1.6.1--h2e03b76_0' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("*.fasta.gz"), emit: fasta + tuple val(meta), path("*.gfa.gz") , emit: gfa + path "versions.yml" , emit: versions + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + # run tool + raven \\ + -t $task.cpus \\ + --graphical-fragment-assembly ${prefix}.gfa \\ + $args \\ + $reads | \\ + gzip -c > ${prefix}.fasta.gz + + # compress assembly graph + gzip -c ${prefix}.gfa > ${prefix}.gfa.gz + + # get tool version + cat <<-END_VERSIONS > versions.yml + "${task.process}": + raven: \$( raven --version ) + END_VERSIONS + """ +} diff --git a/modules/raven/meta.yml b/modules/raven/meta.yml new file mode 100644 index 00000000..644907a9 --- /dev/null +++ b/modules/raven/meta.yml @@ -0,0 +1,47 @@ +name: raven +description: De novo genome assembler for long uncorrected reads. +keywords: + - de novo + - assembly + - genome + - genome assembler + - long uncorrected reads +tools: + - raven: + description: Raven is a de novo genome assembler for long uncorrected reads. + homepage: https://github.com/lbcb-sci/raven + documentation: https://github.com/lbcb-sci/raven#usage + tool_dev_url: https://github.com/lbcb-sci/raven + doi: doi.org/10.1038/s43588-021-00073-4 + licence: ['MIT'] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', suffix:'bacteria' ] + - reads: + type: file + description: Input file in FASTA/FASTQ format. + pattern: "*.{fasta,fastq,fasta.gz,fastq.gz,fa,fq,fa.gz,fq.gz}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', suffix:'bacteria' ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - fasta: + type: file + description: Assembled FASTA file + pattern: "*.fasta.gz" + - gfa: + type: file + description: Repeat graph + pattern: "*.gfa.gz" + +authors: + - "@fmalmeida" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 06ef1649..fe39bcc5 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -1205,6 +1205,10 @@ rasusa: - modules/rasusa/** - tests/modules/rasusa/** +raven: + - modules/raven/** + - tests/modules/raven/** + raxmlng: - modules/raxmlng/** - tests/modules/raxmlng/** diff --git a/tests/modules/raven/main.nf b/tests/modules/raven/main.nf new file mode 100644 index 00000000..01b04519 --- /dev/null +++ b/tests/modules/raven/main.nf @@ -0,0 +1,15 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { RAVEN } from '../../../modules/raven/main.nf' + +workflow test_raven { + + input = [ + [ id:'test', single_end:false ], // meta map + [ file(params.test_data['bacteroides_fragilis']['nanopore']['test_fastq_gz'], checkIfExists: true) ] + ] + + RAVEN ( input ) +} diff --git a/tests/modules/raven/nextflow.config b/tests/modules/raven/nextflow.config new file mode 100644 index 00000000..8730f1c4 --- /dev/null +++ b/tests/modules/raven/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} diff --git a/tests/modules/raven/test.yml b/tests/modules/raven/test.yml new file mode 100644 index 00000000..bbede642 --- /dev/null +++ b/tests/modules/raven/test.yml @@ -0,0 +1,9 @@ +- name: raven test_raven + command: nextflow run ./tests/modules/raven -entry test_raven -c ./tests/config/nextflow.config -c ./tests/modules/raven/nextflow.config + tags: + - raven + files: + - path: output/raven/test.fasta.gz + md5sum: b026b6aba793a9f2bbfb17f732c91926 + - path: output/raven/test.gfa.gz + md5sum: 09053490495f6deb7a3941e559cda626