New module: Samblaster (#954)

* add base code from samblaster

* added test yml

* fixing versions files, should this be the cause of online lint failures

* removed tmp files that shouldn't be there

* fixing output file name - 1

Co-authored-by: Harshil Patel <drpatelh@users.noreply.github.com>

* fixing output file name - 2

Co-authored-by: Harshil Patel <drpatelh@users.noreply.github.com>

* fixing output file name - 3

Co-authored-by: Harshil Patel <drpatelh@users.noreply.github.com>

* fixing output file name - 4

Co-authored-by: Harshil Patel <drpatelh@users.noreply.github.com>

* fixing output file name - 5

Co-authored-by: Harshil Patel <drpatelh@users.noreply.github.com>

* fixing output file name - 6

* fixed indent

* fixed input name and updated test.yml file with new name

Co-authored-by: Harshil Patel <drpatelh@users.noreply.github.com>
This commit is contained in:
Francesco L 2021-11-04 18:18:56 +01:00 committed by GitHub
parent fc4f3e8822
commit 02892ef654
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 197 additions and 0 deletions

View file

@ -0,0 +1,78 @@
//
// Utility functions used in nf-core DSL2 module files
//
//
// Extract name of software tool from process name using $task.process
//
def getSoftwareName(task_process) {
return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()
}
//
// Extract name of module from process name using $task.process
//
def getProcessName(task_process) {
return task_process.tokenize(':')[-1]
}
//
// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules
//
def initOptions(Map args) {
def Map options = [:]
options.args = args.args ?: ''
options.args2 = args.args2 ?: ''
options.args3 = args.args3 ?: ''
options.publish_by_meta = args.publish_by_meta ?: []
options.publish_dir = args.publish_dir ?: ''
options.publish_files = args.publish_files
options.suffix = args.suffix ?: ''
return options
}
//
// Tidy up and join elements of a list to return a path string
//
def getPathFromList(path_list) {
def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries
paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes
return paths.join('/')
}
//
// Function to save/publish module results
//
def saveFiles(Map args) {
def ioptions = initOptions(args.options)
def path_list = [ ioptions.publish_dir ?: args.publish_dir ]
// Do not publish versions.yml unless running from pytest workflow
if (args.filename.equals('versions.yml') && !System.getenv("NF_CORE_MODULES_TEST")) {
return null
}
if (ioptions.publish_by_meta) {
def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta
for (key in key_list) {
if (args.meta && key instanceof String) {
def path = key
if (args.meta.containsKey(key)) {
path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key]
}
path = path instanceof String ? path : ''
path_list.add(path)
}
}
}
if (ioptions.publish_files instanceof Map) {
for (ext in ioptions.publish_files) {
if (args.filename.endsWith(ext.key)) {
def ext_list = path_list.collect()
ext_list.add(ext.value)
return "${getPathFromList(ext_list)}/$args.filename"
}
}
} else if (ioptions.publish_files == null) {
return "${getPathFromList(path_list)}/$args.filename"
}
}

View file

@ -0,0 +1,42 @@
// Import generic module functions
include { initOptions; saveFiles; getSoftwareName; getProcessName } from './functions'
params.options = [:]
options = initOptions(params.options)
process SAMBLASTER {
tag "$meta.id"
label 'process_low'
publishDir "${params.outdir}",
mode: params.publish_dir_mode,
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) }
conda (params.enable_conda ? "bioconda::samblaster=0.1.26 bioconda::samtools=1.14" : null)
if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
container "https://depot.galaxyproject.org/singularity/mulled-v2-19fa9f1a5c3966b63a24166365e81da35738c5ab:ba4a02b56f3e524a6e006bcd99fe8cc1d7fe09eb-0"
} else {
container "quay.io/biocontainers/mulled-v2-19fa9f1a5c3966b63a24166365e81da35738c5ab:ba4a02b56f3e524a6e006bcd99fe8cc1d7fe09eb-0"
}
input:
tuple val(meta), path(bam)
output:
tuple val(meta), path("*.bam"), emit: bam
path "versions.yml" , emit: versions
script:
def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
if( "$bam" == "${prefix}.bam" ) error "Input and output names are the same, use the suffix option to disambiguate"
"""
samtools view -h $options.args2 $bam | \\
samblaster $options.args | \\
samtools view $options.args3 -Sb - >${prefix}.bam
cat <<-END_VERSIONS > versions.yml
${getProcessName(task.process)}:
${getSoftwareName(task.process)}: \$( samblaster -h 2>&1 | head -n 1 | sed 's/^samblaster: Version //' )
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
END_VERSIONS
"""
}

View file

@ -0,0 +1,53 @@
name: samblaster
description: |
This module combines samtools and samblaster in order to use
samblaster capability to filter or tag SAM files, with the advantage
of maintaining both input and output in BAM format.
Samblaster input must contain a sequence header: for this reason it has been piped
with the "samtools view -h" command.
Additional desired arguments for samtools can be passed using:
options.args2 for the input bam file
options.args3 for the output bam file
keywords:
- sort
tools:
- samblaster:
description: |
samblaster is a fast and flexible program for marking duplicates in read-id grouped paired-end SAM files.
It can also optionally output discordant read pairs and/or split read mappings to separate SAM files,
and/or unmapped/clipped reads to a separate FASTQ file.
By default, samblaster reads SAM input from stdin and writes SAM to stdout.
homepage: None
documentation: https://github.com/GregoryFaust/samblaster
tool_dev_url: https://github.com/GregoryFaust/samblaster
doi: "10.1093/bioinformatics/btu314"
licence: ['MIT']
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bam:
type: file
description: BAM file
pattern: "*.bam"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- bam:
type: file
description: Tagged or filtered BAM file
pattern: "*.bam"
authors:
- "@lescai"

View file

@ -1019,6 +1019,10 @@ salmon/quant:
- modules/salmon/quant/**
- tests/modules/salmon/quant/**
samblaster:
- modules/samblaster/**
- tests/modules/samblaster/**
samtools/ampliconclip:
- modules/samtools/ampliconclip/**
- tests/modules/samtools/ampliconclip/**

View file

@ -0,0 +1,13 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { SAMBLASTER } from '../../../modules/samblaster/main.nf' addParams( options: [args: "-M --addMateTags", suffix:'.processed'] )
workflow test_samblaster {
input = [ [ id:'test', single_end:false ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_umi_unsorted_bam'], checkIfExists: true) ]
SAMBLASTER ( input )
}

View file

@ -0,0 +1,7 @@
- name: samblaster test_samblaster
command: nextflow run tests/modules/samblaster -entry test_samblaster -c tests/config/nextflow.config
tags:
- samblaster
files:
- path: output/samblaster/test.processed.bam
md5sum: 950f23d85f75be1cf872f45c0144bdf4