From 3f14b1fabe51e71bfa7460cff6bb96932e4d5d51 Mon Sep 17 00:00:00 2001 From: "Robert A. Petit III" Date: Wed, 24 Mar 2021 00:57:41 -0400 Subject: [PATCH] Update shovill module (#337) * add shovill module * removed deprecated shovill module * fix line ending in functions.nf * fix yaml indentation * reduce spades memory usage * update md5sums * remove non reproducible md5sum * Update test.yml Remove last non-reproducible md5sum. * purely cosmetic order change * proper process label, improve output readability Co-authored-by: Florian Wuennemann --- deprecated/shovill/main.nf | 20 --------- deprecated/shovill/meta.yml | 30 ------------- deprecated/shovill/test/main.nf | 17 ------- deprecated/shovill/test/nextflow.config | 5 --- software/shovill/functions.nf | 60 +++++++++++++++++++++++++ software/shovill/main.nf | 47 +++++++++++++++++++ software/shovill/meta.yml | 59 ++++++++++++++++++++++++ tests/config/pytest_software.yml | 4 ++ tests/software/shovill/main.nf | 52 +++++++++++++++++++++ tests/software/shovill/test.yml | 55 +++++++++++++++++++++++ 10 files changed, 277 insertions(+), 72 deletions(-) delete mode 100644 deprecated/shovill/main.nf delete mode 100644 deprecated/shovill/meta.yml delete mode 100644 deprecated/shovill/test/main.nf delete mode 100644 deprecated/shovill/test/nextflow.config create mode 100644 software/shovill/functions.nf create mode 100644 software/shovill/main.nf create mode 100644 software/shovill/meta.yml create mode 100644 tests/software/shovill/main.nf create mode 100644 tests/software/shovill/test.yml diff --git a/deprecated/shovill/main.nf b/deprecated/shovill/main.nf deleted file mode 100644 index b47975c1..00000000 --- a/deprecated/shovill/main.nf +++ /dev/null @@ -1,20 +0,0 @@ -process shovill { - - tag "$shovill" - - publishDir "${params.outdir}", pattern: '*.fasta', mode: 'copy' - - container "quay.io/biocontainers/shovill:1.0.9--0" - - input: - tuple val(sample_id), path(forward), path(reverse) - - output: - path "${sample_id}.fasta" - - script: - """ - shovill --R1 ${forward} --R2 ${reverse} --outdir shovill_out - mv shovill_out/contigs.fa ${sample_id}.fasta - """ -} diff --git a/deprecated/shovill/meta.yml b/deprecated/shovill/meta.yml deleted file mode 100644 index f955fa59..00000000 --- a/deprecated/shovill/meta.yml +++ /dev/null @@ -1,30 +0,0 @@ -name: Shovill -description: Create a bacterial assembly from paired fastq using shovill -keywords: - - Genome Assembly - - Bacterial Isolates -tools: - - fastqc: - description: | - Shovill assembles bacterial isolate genomes from Illumina - paired-end reads. Shovill uses the SPAdes genome assembler, - providing pre and post-processing to the SPAdes assembly. - It also supports SKESA, Velvet and Megahit. - homepage: https://github.com/tseemann/shovill - documentation: https://github.com/tseemann/shovill/blob/master/README.md -input: - - - - sample_id: - type: string - description: Sample identifier - - reads: - type: file - description: pair of fastq files -output: - - - - assembly: - type: file - description: fasta file - pattern: ${sample_id}.fasta -authors: - - "@annacprice" diff --git a/deprecated/shovill/test/main.nf b/deprecated/shovill/test/main.nf deleted file mode 100644 index 85ba6627..00000000 --- a/deprecated/shovill/test/main.nf +++ /dev/null @@ -1,17 +0,0 @@ -#!/usr/bin/env nextflow - -nextflow.preview.dsl = 2 - -// import shovill -include {shovill} from '../main.nf' params(params) - -// define input channel -readsPath = '../../../test-datasets/tools/shovill/input/SRR3609257_{1,2}.fastq.gz' -Channel - .fromFilePairs( "${readsPath}", flat: true ) - .set{ ch_reads } - -// main workflow -workflow { - shovill(ch_reads) -} diff --git a/deprecated/shovill/test/nextflow.config b/deprecated/shovill/test/nextflow.config deleted file mode 100644 index 44cfb78d..00000000 --- a/deprecated/shovill/test/nextflow.config +++ /dev/null @@ -1,5 +0,0 @@ -// docker -docker.enabled = true - -// output directory -params.outdir = './results' diff --git a/software/shovill/functions.nf b/software/shovill/functions.nf new file mode 100644 index 00000000..f177f0c8 --- /dev/null +++ b/software/shovill/functions.nf @@ -0,0 +1,60 @@ +/* + * ----------------------------------------------------- + * Utility functions used in nf-core DSL2 module files + * ----------------------------------------------------- + */ + +/* + * Extract name of software tool from process name using $task.process + */ +def getSoftwareName(task_process) { + return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() +} + +/* + * Function to initialise default values and to generate a Groovy Map of available options for nf-core modules + */ +def initOptions(Map args) { + def Map options = [:] + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.args3 = args.args3 ?: '' + options.publish_by_id = args.publish_by_id ?: false + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' + return options +} + +/* + * Tidy up and join elements of a list to return a path string + */ +def getPathFromList(path_list) { + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + return paths.join('/') +} + +/* + * Function to save/publish module results + */ +def saveFiles(Map args) { + if (!args.filename.endsWith('.version.txt')) { + def ioptions = initOptions(args.options) + def path_list = [ ioptions.publish_dir ?: args.publish_dir ] + if (ioptions.publish_by_id) { + path_list.add(args.publish_id) + } + if (ioptions.publish_files instanceof Map) { + for (ext in ioptions.publish_files) { + if (args.filename.endsWith(ext.key)) { + def ext_list = path_list.collect() + ext_list.add(ext.value) + return "${getPathFromList(ext_list)}/$args.filename" + } + } + } else if (ioptions.publish_files == null) { + return "${getPathFromList(path_list)}/$args.filename" + } + } +} diff --git a/software/shovill/main.nf b/software/shovill/main.nf new file mode 100644 index 00000000..fc472ea9 --- /dev/null +++ b/software/shovill/main.nf @@ -0,0 +1,47 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process SHOVILL { + tag "$meta.id" + label 'process_medium' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) } + + conda (params.enable_conda ? "bioconda::shovill=1.1.0" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/shovill:1.1.0--0" + } else { + container "quay.io/biocontainers/shovill:1.1.0--0" + } + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("contigs.fa") , emit: contigs + tuple val(meta), path("shovill.corrections") , emit: corrections + tuple val(meta), path("shovill.log") , emit: log + tuple val(meta), path("{skesa,spades,megahit,velvet}.fasta"), emit: raw_contigs + tuple val(meta), path("contigs.{fastg,gfa,LastGraph}") , optional:true, emit: gfa + path "*.version.txt" , emit: version + + script: + def software = getSoftwareName(task.process) + def memory = task.memory.toGiga() + """ + shovill \\ + --R1 ${reads[0]} \\ + --R2 ${reads[1]} \\ + $options.args \\ + --cpus $task.cpus \\ + --ram $memory \\ + --outdir ./ \\ + --force + + echo \$(shovill --version 2>&1) | sed 's/^.*shovill //' > ${software}.version.txt + """ +} diff --git a/software/shovill/meta.yml b/software/shovill/meta.yml new file mode 100644 index 00000000..0a8661b1 --- /dev/null +++ b/software/shovill/meta.yml @@ -0,0 +1,59 @@ +name: shovill +## TODO nf-core: Add a description of the module and list keywords +description: Assemble bacterial isolate genomes from Illumina paired-end reads +keywords: + - bacterial + - assembly + - illumina + +tools: + - shovill: + ## TODO nf-core: Add a description and other details for the software below + description: Microbial assembly pipeline for Illumina paired-end reads + homepage: https://github.com/tseemann/shovill + documentation: https://github.com/tseemann/shovill/blob/master/README.md + licence: ['GPL v2'] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: List of input paired-end FastQ files + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - version: + type: file + description: File containing software version + pattern: "*.{version.txt}" + - contigs: + type: file + description: The final assembly produced by Shovill + pattern: "contigs.fa" + - corrections: + type: file + description: List of post-assembly corrections made by Shovill + pattern: "shovill.corrections" + - log: + type: file + description: Full log file for bug reporting + pattern: "shovill.log" + - raw_contigs: + type: file + description: Raw assembly produced by the assembler (SKESA, SPAdes, MEGAHIT, or Velvet) + pattern: "{skesa,spades,megahit,velvet}.fasta" + - gfa: + type: file + description: Assembly graph produced by MEGAHIT, SPAdes, or Velvet + pattern: "contigs.{fastg,gfa,LastGraph}" + +authors: + - "@rpetit3" diff --git a/tests/config/pytest_software.yml b/tests/config/pytest_software.yml index 6398b5cb..5cb95c39 100755 --- a/tests/config/pytest_software.yml +++ b/tests/config/pytest_software.yml @@ -383,6 +383,10 @@ seqwish_induce: - software/seqwish/induce/** - tests/software/seqwish/induce/** +shovill: + - software/shovill/** + - tests/software/shovill/** + spades: - software/spades/** - tests/software/spades/** diff --git a/tests/software/shovill/main.nf b/tests/software/shovill/main.nf new file mode 100644 index 00000000..8b8a9d85 --- /dev/null +++ b/tests/software/shovill/main.nf @@ -0,0 +1,52 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { SHOVILL } from '../../../software/shovill/main.nf' addParams( options: [args: '--gsize 2800000 --kmers 31'] ) +include { SHOVILL as SHOVILL_SKESA } from '../../../software/shovill/main.nf' addParams( options: [args: '--assembler skesa --gsize 2800000'] ) +include { SHOVILL as SHOVILL_MEGAHIT } from '../../../software/shovill/main.nf' addParams( options: [args: '--assembler megahit --gsize 2800000'] ) +include { SHOVILL as SHOVILL_VELVET } from '../../../software/shovill/main.nf' addParams( options: [args: '--assembler velvet --gsize 2800000'] ) + +workflow test_shovill { + + def input = [] + input = [ [ id:'test', single_end:false ], // meta map + [ file("https://github.com/nf-core/test-datasets/raw/bacass/ERR044595_1M_1.fastq.gz", checkIfExists: true), + file("https://github.com/nf-core/test-datasets/raw/bacass/ERR044595_1M_2.fastq.gz", checkIfExists: true) ] ] + + SHOVILL ( input ) + +} + +workflow test_shovill_megahit { + + def input = [] + input = [ [ id:'test', single_end:false ], // meta map + [ file("${launchDir}/tests/data/generic/fastq/test_R1.fastq.gz", checkIfExists: true), + file("${launchDir}/tests/data/generic/fastq/test_R2.fastq.gz", checkIfExists: true) ] ] + + SHOVILL_MEGAHIT ( input ) + +} + +workflow test_shovill_skesa { + + def input = [] + input = [ [ id:'test', single_end:false ], // meta map + [ file("${launchDir}/tests/data/generic/fastq/test_R1.fastq.gz", checkIfExists: true), + file("${launchDir}/tests/data/generic/fastq/test_R2.fastq.gz", checkIfExists: true) ] ] + + SHOVILL_SKESA ( input ) + +} + +workflow test_shovill_velvet { + + def input = [] + input = [ [ id:'test', single_end:false ], // meta map + [ file("${launchDir}/tests/data/generic/fastq/test_R1.fastq.gz", checkIfExists: true), + file("${launchDir}/tests/data/generic/fastq/test_R2.fastq.gz", checkIfExists: true) ] ] + + SHOVILL_VELVET ( input ) + +} diff --git a/tests/software/shovill/test.yml b/tests/software/shovill/test.yml new file mode 100644 index 00000000..a3072b3a --- /dev/null +++ b/tests/software/shovill/test.yml @@ -0,0 +1,55 @@ +- name: shovill with spades + command: nextflow run ./tests/software/shovill -entry test_shovill -c tests/config/nextflow.config + tags: + - shovill + - shovill_spades + files: + - path: output/shovill/contigs.gfa + md5sum: 277af5d8ae8a0d2432db59a316226828 + - path: output/shovill/spades.fasta + md5sum: 2506fc2af3a24397a3c5d3fbda0861fb + # MD5sum not reproducible (timestamp) + - path: output/shovill/contigs.fa + - path: output/shovill/shovill.corrections + - path: output/shovill/shovill.log + +- name: shovill with megahit + command: nextflow run ./tests/software/shovill -entry test_shovill_megahit -c tests/config/nextflow.config + tags: + - shovill + - shovill_megahit + files: + - path: output/shovill/shovill.corrections + md5sum: d41d8cd98f00b204e9800998ecf8427e + # MD5sum not reproducible (timestamp, contig order) + - path: output/shovill/contigs.fa + - path: output/shovill/contigs.fastg + - path: output/shovill/megahit.fasta + - path: output/shovill/shovill.log + +- name: shovill with skesa + command: nextflow run ./tests/software/shovill -entry test_shovill_skesa -c tests/config/nextflow.config + tags: + - shovill + - shovill_skesa + files: + - path: output/shovill/shovill.corrections + md5sum: d41d8cd98f00b204e9800998ecf8427e + - path: output/shovill/skesa.fasta + md5sum: c0c2d11b633310eb1437345cb599c33c + # MD5sum not reproducible (timestamp) + - path: output/shovill/contigs.fa + - path: output/shovill/shovill.log + +- name: shovill with velvet + command: nextflow run ./tests/software/shovill -entry test_shovill_velvet -c tests/config/nextflow.config + tags: + - shovill + - shovill_velvet + files: + # MD5sum not reproducible (timestamp, contig order) + - path: output/shovill/contigs.fa + - path: output/shovill/contigs.LastGraph + - path: output/shovill/shovill.corrections + - path: output/shovill/shovill.log + - path: output/shovill/velvet.fasta