From e19a9a2474c6609875b49d8140a7264e21a1beee Mon Sep 17 00:00:00 2001 From: Matthias De Smet <11850640+matthdsm@users.noreply.github.com> Date: Fri, 8 Apr 2022 15:54:15 +0200 Subject: [PATCH] new tool: staden_io_lib (#1499) * new tool: staden_io_lib * update docker containers * add test.yml * add fai index input * typo * fix version.yml * update md5sum * omit md5sum for cram * move scramble to submodule * add missing in/output * remove some comments Co-authored-by: Sateesh Peri <33637490+sateeshperi@users.noreply.github.com> --- modules/stadeniolib/scramble/main.nf | 61 +++++++++++++++++++ modules/stadeniolib/scramble/meta.yml | 58 ++++++++++++++++++ tests/config/pytest_modules.yml | 4 ++ tests/modules/stadeniolib/scramble/main.nf | 15 +++++ .../stadeniolib/scramble/nextflow.config | 5 ++ tests/modules/stadeniolib/scramble/test.yml | 7 +++ 6 files changed, 150 insertions(+) create mode 100644 modules/stadeniolib/scramble/main.nf create mode 100644 modules/stadeniolib/scramble/meta.yml create mode 100644 tests/modules/stadeniolib/scramble/main.nf create mode 100644 tests/modules/stadeniolib/scramble/nextflow.config create mode 100644 tests/modules/stadeniolib/scramble/test.yml diff --git a/modules/stadeniolib/scramble/main.nf b/modules/stadeniolib/scramble/main.nf new file mode 100644 index 00000000..e24fb2cb --- /dev/null +++ b/modules/stadeniolib/scramble/main.nf @@ -0,0 +1,61 @@ +process STADENIOLIB_SCRAMBLE { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::staden_io_lib=1.14.14" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/staden_io_lib:1.14.14--h0d9da7e_3' : + 'quay.io/biocontainers/staden_io_lib:1.14.14--h0d9da7e_3' }" + + input: + tuple val(meta), path(reads) + path(fasta) + path(fai) + path(gzi) + + output: + tuple val(meta), path("*.cram") ,emit: cram + path "*.gzi" ,emit: gzi, optional: true + path "versions.yml" ,emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def inputformat = reads.getExtension + def outputformat = "cram" + if ("-O sam" in args) { + outputformat = "sam" + } else if ("-O bam" in args) { + outputformat = "bam" + } + + def reference = if fasta && fai : "--r ${fasta}" else "" + if (outputformat == "cram" && !reference) { + error "Cannot convert to CRAM without a reference" + } + + def gz_index = if gzi : "--g ${gzi}" else "" + if (outputformat == "cram" || outputformat == "sam") { + gz_index = "" + warning "Cannot use gzip index for CRAM or SAM output" + } + + """ + scramble \ + $args \ + -I ${inputformat} \ + $reference \ + -t $task.cpus \ + ${reads} \ + ${prefix}.${outputformat} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + stadeniolib: \$(echo \$(scramble -h | head -n 1 |sed 's/^.*version //')) + END_VERSIONS + """ +} diff --git a/modules/stadeniolib/scramble/meta.yml b/modules/stadeniolib/scramble/meta.yml new file mode 100644 index 00000000..7e53a1b4 --- /dev/null +++ b/modules/stadeniolib/scramble/meta.yml @@ -0,0 +1,58 @@ +name: "stadeniolib_scramble" +description: Advanced sequence file format conversions +keywords: + - sam + - bam + - cram + - compression +tools: + - "scramble": + description: "Staden Package 'io_lib' (sometimes referred to as libstaden-read by distributions). This contains code for reading and writing a variety of Bioinformatics / DNA Sequence formats." + homepage: "https://github.com/jkbonfield/io_lib" + documentation: "https://github.com/jkbonfield/io_lib/blob/master/README.md" + tool_dev_url: "https://github.com/jkbonfield/io_lib" + licence: "['BSD']" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - fasta: + type: file + description: Reference genome in FASTA format + pattern: "*.{fa,fasta}" + - fai: + type: file + description: FASTA index file from samtools faidx + pattern: "*.{fai}" + - gzi: + type: file + description: Optional gzip index file for BAM inputs + pattern: "*.gzi" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - reads: + type: file + description: Converted reads + pattern: "*.{sam, bam, cram}" + - gzi: + type: Optional file + description: gzip index file for BAM outputs + pattern: ".{bam.gzi}" +authors: + - "@matthdsm" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 31f62c78..2b99f835 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -1707,6 +1707,10 @@ ssuissero: - modules/ssuissero/** - tests/modules/ssuissero/** +stadeniolib/scramble: + - modules/stadeniolib/scramble/** + - tests/modules/stadeniolib/scramble/** + staphopiasccmec: - modules/staphopiasccmec/** - tests/modules/staphopiasccmec/** diff --git a/tests/modules/stadeniolib/scramble/main.nf b/tests/modules/stadeniolib/scramble/main.nf new file mode 100644 index 00000000..d29c6dd8 --- /dev/null +++ b/tests/modules/stadeniolib/scramble/main.nf @@ -0,0 +1,15 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { STADENIOLIB_SCRAMBLE } from '../../../../modules/stadeniolib/scramble/main.nf' + +workflow test_stadeniolib { + + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true) + ] + + STADENIOLIB_SCRAMBLE ( input, file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true), file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true), []) +} diff --git a/tests/modules/stadeniolib/scramble/nextflow.config b/tests/modules/stadeniolib/scramble/nextflow.config new file mode 100644 index 00000000..50f50a7a --- /dev/null +++ b/tests/modules/stadeniolib/scramble/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/stadeniolib/scramble/test.yml b/tests/modules/stadeniolib/scramble/test.yml new file mode 100644 index 00000000..cea6fb70 --- /dev/null +++ b/tests/modules/stadeniolib/scramble/test.yml @@ -0,0 +1,7 @@ +- name: stadeniolib test_stadeniolib + command: nextflow run tests/modules/stadeniolib -entry test_stadeniolib -c tests/config/nextflow.config + tags: + - stadeniolib + files: + - path: output/stadeniolib/test.cram + - path: output/stadeniolib/versions.yml