new tool: staden_io_lib (#1499)

* new tool: staden_io_lib

* update docker containers

* add test.yml

* add fai index input

* typo

* fix version.yml

* update md5sum

* omit md5sum for cram

* move scramble to submodule

* add missing in/output

* remove some comments

Co-authored-by: Sateesh Peri <33637490+sateeshperi@users.noreply.github.com>
This commit is contained in:
Matthias De Smet 2022-04-08 15:54:15 +02:00 committed by GitHub
parent f57f085912
commit e19a9a2474
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 150 additions and 0 deletions

View file

@ -0,0 +1,61 @@
process STADENIOLIB_SCRAMBLE {
tag "$meta.id"
label 'process_medium'
conda (params.enable_conda ? "bioconda::staden_io_lib=1.14.14" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/staden_io_lib:1.14.14--h0d9da7e_3' :
'quay.io/biocontainers/staden_io_lib:1.14.14--h0d9da7e_3' }"
input:
tuple val(meta), path(reads)
path(fasta)
path(fai)
path(gzi)
output:
tuple val(meta), path("*.cram") ,emit: cram
path "*.gzi" ,emit: gzi, optional: true
path "versions.yml" ,emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def inputformat = reads.getExtension
def outputformat = "cram"
if ("-O sam" in args) {
outputformat = "sam"
} else if ("-O bam" in args) {
outputformat = "bam"
}
def reference = if fasta && fai : "--r ${fasta}" else ""
if (outputformat == "cram" && !reference) {
error "Cannot convert to CRAM without a reference"
}
def gz_index = if gzi : "--g ${gzi}" else ""
if (outputformat == "cram" || outputformat == "sam") {
gz_index = ""
warning "Cannot use gzip index for CRAM or SAM output"
}
"""
scramble \
$args \
-I ${inputformat} \
$reference \
-t $task.cpus \
${reads} \
${prefix}.${outputformat}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
stadeniolib: \$(echo \$(scramble -h | head -n 1 |sed 's/^.*version //'))
END_VERSIONS
"""
}

View file

@ -0,0 +1,58 @@
name: "stadeniolib_scramble"
description: Advanced sequence file format conversions
keywords:
- sam
- bam
- cram
- compression
tools:
- "scramble":
description: "Staden Package 'io_lib' (sometimes referred to as libstaden-read by distributions). This contains code for reading and writing a variety of Bioinformatics / DNA Sequence formats."
homepage: "https://github.com/jkbonfield/io_lib"
documentation: "https://github.com/jkbonfield/io_lib/blob/master/README.md"
tool_dev_url: "https://github.com/jkbonfield/io_lib"
licence: "['BSD']"
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- reads:
type: file
description: BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}"
- fasta:
type: file
description: Reference genome in FASTA format
pattern: "*.{fa,fasta}"
- fai:
type: file
description: FASTA index file from samtools faidx
pattern: "*.{fai}"
- gzi:
type: file
description: Optional gzip index file for BAM inputs
pattern: "*.gzi"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- reads:
type: file
description: Converted reads
pattern: "*.{sam, bam, cram}"
- gzi:
type: Optional file
description: gzip index file for BAM outputs
pattern: ".{bam.gzi}"
authors:
- "@matthdsm"

View file

@ -1707,6 +1707,10 @@ ssuissero:
- modules/ssuissero/**
- tests/modules/ssuissero/**
stadeniolib/scramble:
- modules/stadeniolib/scramble/**
- tests/modules/stadeniolib/scramble/**
staphopiasccmec:
- modules/staphopiasccmec/**
- tests/modules/staphopiasccmec/**

View file

@ -0,0 +1,15 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { STADENIOLIB_SCRAMBLE } from '../../../../modules/stadeniolib/scramble/main.nf'
workflow test_stadeniolib {
input = [
[ id:'test', single_end:false ], // meta map
file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true)
]
STADENIOLIB_SCRAMBLE ( input, file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true), file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true), [])
}

View file

@ -0,0 +1,5 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}

View file

@ -0,0 +1,7 @@
- name: stadeniolib test_stadeniolib
command: nextflow run tests/modules/stadeniolib -entry test_stadeniolib -c tests/config/nextflow.config
tags:
- stadeniolib
files:
- path: output/stadeniolib/test.cram
- path: output/stadeniolib/versions.yml