Raven assembler (#1087)

Adding a new module for [Raven](https://github.com/lbcb-sci/raven), a De novo genome assembler for long uncorrected reads.
This commit is contained in:
Felipe Marques de Almeida 2022-02-01 11:47:01 +01:00 committed by GitHub
parent a74e8436cc
commit 53b324281f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 119 additions and 0 deletions

39
modules/raven/main.nf Normal file
View file

@ -0,0 +1,39 @@
process RAVEN {
tag "$meta.id"
label 'process_medium'
conda (params.enable_conda ? "bioconda::raven-assembler=1.6.1" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/raven-assembler:1.6.1--h2e03b76_0' :
'quay.io/biocontainers/raven-assembler:1.6.1--h2e03b76_0' }"
input:
tuple val(meta), path(reads)
output:
tuple val(meta), path("*.fasta.gz"), emit: fasta
tuple val(meta), path("*.gfa.gz") , emit: gfa
path "versions.yml" , emit: versions
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
# run tool
raven \\
-t $task.cpus \\
--graphical-fragment-assembly ${prefix}.gfa \\
$args \\
$reads | \\
gzip -c > ${prefix}.fasta.gz
# compress assembly graph
gzip -c ${prefix}.gfa > ${prefix}.gfa.gz
# get tool version
cat <<-END_VERSIONS > versions.yml
"${task.process}":
raven: \$( raven --version )
END_VERSIONS
"""
}

47
modules/raven/meta.yml Normal file
View file

@ -0,0 +1,47 @@
name: raven
description: De novo genome assembler for long uncorrected reads.
keywords:
- de novo
- assembly
- genome
- genome assembler
- long uncorrected reads
tools:
- raven:
description: Raven is a de novo genome assembler for long uncorrected reads.
homepage: https://github.com/lbcb-sci/raven
documentation: https://github.com/lbcb-sci/raven#usage
tool_dev_url: https://github.com/lbcb-sci/raven
doi: doi.org/10.1038/s43588-021-00073-4
licence: ['MIT']
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', suffix:'bacteria' ]
- reads:
type: file
description: Input file in FASTA/FASTQ format.
pattern: "*.{fasta,fastq,fasta.gz,fastq.gz,fa,fq,fa.gz,fq.gz}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', suffix:'bacteria' ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- fasta:
type: file
description: Assembled FASTA file
pattern: "*.fasta.gz"
- gfa:
type: file
description: Repeat graph
pattern: "*.gfa.gz"
authors:
- "@fmalmeida"

View file

@ -1205,6 +1205,10 @@ rasusa:
- modules/rasusa/** - modules/rasusa/**
- tests/modules/rasusa/** - tests/modules/rasusa/**
raven:
- modules/raven/**
- tests/modules/raven/**
raxmlng: raxmlng:
- modules/raxmlng/** - modules/raxmlng/**
- tests/modules/raxmlng/** - tests/modules/raxmlng/**

View file

@ -0,0 +1,15 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { RAVEN } from '../../../modules/raven/main.nf'
workflow test_raven {
input = [
[ id:'test', single_end:false ], // meta map
[ file(params.test_data['bacteroides_fragilis']['nanopore']['test_fastq_gz'], checkIfExists: true) ]
]
RAVEN ( input )
}

View file

@ -0,0 +1,5 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}

View file

@ -0,0 +1,9 @@
- name: raven test_raven
command: nextflow run ./tests/modules/raven -entry test_raven -c ./tests/config/nextflow.config -c ./tests/modules/raven/nextflow.config
tags:
- raven
files:
- path: output/raven/test.fasta.gz
md5sum: b026b6aba793a9f2bbfb17f732c91926
- path: output/raven/test.gfa.gz
md5sum: 09053490495f6deb7a3941e559cda626