diff --git a/modules/flye/main.nf b/modules/flye/main.nf new file mode 100644 index 00000000..61d88fcb --- /dev/null +++ b/modules/flye/main.nf @@ -0,0 +1,68 @@ +process FLYE { + tag "$meta.id" + label 'process_high' + + conda (params.enable_conda ? "bioconda::flye=2.9" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/flye:2.9--py39h6935b12_1' : + 'quay.io/biocontainers/flye:2.9--py39h6935b12_1' }" + + input: + tuple val(meta), path(reads) + val mode + + output: + tuple val(meta), path("*.fasta.gz"), emit: fasta + tuple val(meta), path("*.gfa.gz") , emit: gfa + tuple val(meta), path("*.gv.gz") , emit: gv + tuple val(meta), path("*.txt") , emit: txt + tuple val(meta), path("*.log") , emit: log + tuple val(meta), path("*.json") , emit: json + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def valid_mode = ["--pacbio-raw", "--pacbio-corr", "--pacbio-hifi", "--nano-raw", "--nano-corr", "--nano-hq"] + if ( !valid_mode.contains(mode) ) { error "Unrecognised mode to run Flye. Options: ${valid_mode.join(', ')}" } + """ + flye \\ + $mode \\ + $reads \\ + --out-dir . \\ + --threads \\ + $task.cpus \\ + $args + + gzip -c assembly.fasta > ${prefix}.assembly.fasta.gz + gzip -c assembly_graph.gfa > ${prefix}.assembly_graph.gfa.gz + gzip -c assembly_graph.gv > ${prefix}.assembly_graph.gv.gz + mv assembly_info.txt ${prefix}.assembly_info.txt + mv flye.log ${prefix}.flye.log + mv params.json ${prefix}.params.json + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + flye: \$( flye --version ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo stub > assembly.fasta | gzip -c assembly.fasta > ${prefix}.assembly.fasta.gz + echo stub > assembly_graph.gfa | gzip -c assembly_graph.gfa > ${prefix}.assembly_graph.gfa.gz + echo stub > assembly_graph.gv | gzip -c assembly_graph.gv > ${prefix}.assembly_graph.gv.gz + echo contig_1 > ${prefix}.assembly_info.txt + echo stub > ${prefix}.flye.log + echo stub > ${prefix}.params.json + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + flye: \$( flye --version ) + END_VERSIONS + """ +} diff --git a/modules/flye/meta.yml b/modules/flye/meta.yml new file mode 100644 index 00000000..239c920b --- /dev/null +++ b/modules/flye/meta.yml @@ -0,0 +1,69 @@ +name: "flye" +description: De novo assembler for single molecule sequencing reads +keywords: + - assembly + - genome + - de novo + - genome assembler + - single molecule +tools: + - "flye": + description: "Fast and accurate de novo assembler for single molecule sequencing reads" + homepage: "https://github.com/fenderglass/Flye" + documentation: "https://github.com/fenderglass/Flye/blob/flye/docs/USAGE.md" + tool_dev_url: "https://github.com/fenderglass/Flye" + doi: "10.1038/s41592-020-00971-x" + licence: "['BSD-3-clause']" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - reads: + type: file + description: Input reads from Oxford Nanopore or PacBio data in FASTA/FASTQ format. + pattern: "*.{fasta,fastq,fasta.gz,fastq.gz,fa,fq,fa.gz,fq.gz}" + - mode: + type: value + description: Flye mode depending on the input data (source and error rate) + pattern: "--pacbio-raw|--pacbio-corr|--pacbio-hifi|--nano-raw|--nano-corr|--nano-hq" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - fasta: + type: file + description: Assembled FASTA file + pattern: "*.fasta.gz" + - gfa: + type: file + description: Repeat graph in gfa format + pattern: "*.gfa.gz" + - gv: + type: file + description: Repeat graph in gv format + pattern: "*.gv.gz" + - txt: + type: file + description: Extra information and statistics about resulting contigs + pattern: "*.txt" + - log: + type: file + description: Flye log file + pattern: "*.log" + - json: + type: file + description: Flye parameters + pattern: "*.json" + +authors: + - "@mirpedrol" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 3eba2e8c..3981ea36 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -739,6 +739,10 @@ flash: - modules/flash/** - tests/modules/flash/** +flye: + - modules/flye/** + - tests/modules/flye/** + freebayes: - modules/freebayes/** - tests/modules/freebayes/** diff --git a/tests/modules/flye/main.nf b/tests/modules/flye/main.nf new file mode 100644 index 00000000..c078c4da --- /dev/null +++ b/tests/modules/flye/main.nf @@ -0,0 +1,71 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { FLYE } from '../../../modules/flye/main.nf' + +workflow test_flye_pacbio_raw { + + input = [ + [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['pacbio']['hifi'], checkIfExists: true) + ] + mode = "--pacbio-raw" + + FLYE ( input, mode ) +} + +workflow test_flye_pacbio_corr { + + input = [ + [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['pacbio']['hifi'], checkIfExists: true) + ] + mode = "--pacbio-corr" + + FLYE ( input, mode ) +} + +workflow test_flye_pacbio_hifi { + + input = [ + [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['pacbio']['hifi'], checkIfExists: true) + ] + mode = "--pacbio-hifi" + + FLYE ( input, mode ) +} + +workflow test_flye_nano_raw { + + input = [ + [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['pacbio']['hifi'], checkIfExists: true) + ] + mode = "--nano-raw" + + FLYE ( input, mode ) +} + +workflow test_flye_nano_corr { + + input = [ + [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['pacbio']['hifi'], checkIfExists: true) + ] + mode = "--nano-corr" + + FLYE ( input, mode ) +} + +workflow test_flye_nano_hq { + + input = [ + [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['pacbio']['hifi'], checkIfExists: true) + ] + mode = "--nano-hq" + + FLYE ( input, mode ) +} diff --git a/tests/modules/flye/nextflow.config b/tests/modules/flye/nextflow.config new file mode 100644 index 00000000..8730f1c4 --- /dev/null +++ b/tests/modules/flye/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} diff --git a/tests/modules/flye/test.yml b/tests/modules/flye/test.yml new file mode 100644 index 00000000..0c0f8766 --- /dev/null +++ b/tests/modules/flye/test.yml @@ -0,0 +1,85 @@ +# According to the issue https://github.com/fenderglass/Flye/issues/164 +# Some fluctuations are expected because of the heuristics +# Here we check the that test.assembly_info.txt contains at least one contig + +- name: flye test_flye_pacbio_raw + command: nextflow run ./tests/modules/flye -entry test_flye_pacbio_raw -c ./tests/config/nextflow.config -c ./tests/modules/flye/nextflow.config -stub-run + tags: + - flye + files: + - path: output/flye/test.assembly.fasta.gz + - path: output/flye/test.assembly_graph.gfa.gz + - path: output/flye/test.assembly_graph.gv.gz + - path: output/flye/test.assembly_info.txt + contains: ["contig_1"] + - path: output/flye/test.flye.log + - path: output/flye/test.params.json + +- name: flye test_flye_pacbio_corr + command: nextflow run ./tests/modules/flye -entry test_flye_pacbio_corr -c ./tests/config/nextflow.config -c ./tests/modules/flye/nextflow.config + tags: + - flye + files: + - path: output/flye/test.assembly.fasta.gz + - path: output/flye/test.assembly_graph.gfa.gz + - path: output/flye/test.assembly_graph.gv.gz + - path: output/flye/test.assembly_info.txt + contains: ["contig_1"] + - path: output/flye/test.flye.log + - path: output/flye/test.params.json + md5sum: 54b576cb6d4d27656878a7fd3657bde9 + +- name: flye test_flye_pacbio_hifi + command: nextflow run ./tests/modules/flye -entry test_flye_pacbio_hifi -c ./tests/config/nextflow.config -c ./tests/modules/flye/nextflow.config + tags: + - flye + files: + - path: output/flye/test.assembly.fasta.gz + - path: output/flye/test.assembly_graph.gfa.gz + - path: output/flye/test.assembly_graph.gv.gz + - path: output/flye/test.assembly_info.txt + contains: ["contig_1"] + - path: output/flye/test.flye.log + - path: output/flye/test.params.json + md5sum: 54b576cb6d4d27656878a7fd3657bde9 + +- name: flye test_flye_nano_raw + command: nextflow run ./tests/modules/flye -entry test_flye_nano_raw -c ./tests/config/nextflow.config -c ./tests/modules/flye/nextflow.config -stub-run + tags: + - flye + files: + - path: output/flye/test.assembly.fasta.gz + - path: output/flye/test.assembly_graph.gfa.gz + - path: output/flye/test.assembly_graph.gv.gz + - path: output/flye/test.assembly_info.txt + contains: ["contig_1"] + - path: output/flye/test.flye.log + - path: output/flye/test.params.json + +- name: flye test_flye_nano_corr + command: nextflow run ./tests/modules/flye -entry test_flye_nano_corr -c ./tests/config/nextflow.config -c ./tests/modules/flye/nextflow.config + tags: + - flye + files: + - path: output/flye/test.assembly.fasta.gz + - path: output/flye/test.assembly_graph.gfa.gz + - path: output/flye/test.assembly_graph.gv.gz + - path: output/flye/test.assembly_info.txt + contains: ["contig_1"] + - path: output/flye/test.flye.log + - path: output/flye/test.params.json + md5sum: 54b576cb6d4d27656878a7fd3657bde9 + +- name: flye test_flye_nano_hq + command: nextflow run ./tests/modules/flye -entry test_flye_nano_hq -c ./tests/config/nextflow.config -c ./tests/modules/flye/nextflow.config + tags: + - flye + files: + - path: output/flye/test.assembly.fasta.gz + - path: output/flye/test.assembly_graph.gfa.gz + - path: output/flye/test.assembly_graph.gv.gz + - path: output/flye/test.assembly_info.txt + contains: ["contig_1"] + - path: output/flye/test.flye.log + - path: output/flye/test.params.json + md5sum: 54b576cb6d4d27656878a7fd3657bde9