New tool: biobambam/bamsormadup (#1478)

* add bamsormadup

* fix yaml

* add test.yml

* Update tests/modules/biobambam/bamsormadup/test.yml

Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>

* test meta.yaml: remove md5sums

* Tool bamsormadup:
- add (optional) reference input
- add bam index ouput
- add cram output option
- make metrics output: more general

* fix input and output formats

* update input file description

* drop sam output, goes against nf-core regs; add input check for cram files

* fix typo

* Update modules/biobambam/bamsormadup/main.nf

Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>

* improve ref fasta name

* fix if else shorthand

* fix syntax error

* kind of fix tests

* set fixed suffix for metrics file to keep it in line with picard and bammarkduplicates2

* fix command line

* update test.yml

* add support for multiple input bams

* Update modules/biobambam/bamsormadup/meta.yml

Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>

* Update modules/biobambam/bamsormadup/meta.yml

Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>

* Update tests/modules/biobambam/bamsormadup/test.yml

Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>

Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>
This commit is contained in:
Matthias De Smet 2022-04-06 08:18:23 +02:00 committed by GitHub
parent 797ce3254e
commit dc95e67e15
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 133 additions and 0 deletions

View file

@ -0,0 +1,46 @@
process BIOBAMBAM_BAMSORMADUP {
tag "$meta.id"
label "process_medium"
conda (params.enable_conda ? "bioconda::biobambam=2.0.183" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/biobambam:2.0.183--h9f5acd7_1' : 'quay.io/biocontainers/biobambam:2.0.183--h9f5acd7_1'}"
input:
tuple val(meta), path(bams)
path(fasta)
output:
tuple val(meta), path("*.{bam,cram}") ,emit: bam
tuple val(meta), path("*.bam.bai") ,optional:true, emit: bam_index
tuple val(meta), path("*.metrics.txt") ,emit: metrics
path "versions.yml" ,emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def suffix = args.contains("outputformat=cram") ? "cram" : "bam"
def input_string = bams.join(" I=")
if (args.contains("outputformat=cram") && reference == null) error "Reference required for CRAM output."
"""
bamcat \\
I=${input_string} \\
level=0 \\
| bamsormadup \\
$args \\
M=${prefix}.metrics.txt \\
tmpfile=$prefix \\
threads=$task.cpus \\
> ${prefix}.${suffix}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
bamcat: \$(echo \$(bamsormadup --version 2>&1) | sed 's/^This is biobambam2 version //; s/..biobambam2 is .*\$//' )
bamsormadup: \$(echo \$(bamsormadup --version 2>&1) | sed 's/^This is biobambam2 version //; s/..biobambam2 is .*\$//' )
END_VERSIONS
"""
}

View file

@ -0,0 +1,52 @@
name: biobambam_bamsormadup
description: Parallel sorting and duplicate marking
keywords:
- markduplicates
- sort
- bam
- cram
tools:
- biobambam:
description: |
biobambam is a set of tools for early stage alignment file processing.
homepage: https://gitlab.com/german.tischler/biobambam2
documentation: https://gitlab.com/german.tischler/biobambam2/-/blob/master/README.md
doi: 10.1186/1751-0473-9-13
licence: ["GPL v3"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bams:
type: file
description: List containing 1 or more bam files
- fasta:
type: file
description: Reference genome in FASTA format (optional)
pattern: "*.{fa,fasta}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bam:
type: file
description: BAM/CRAM file with duplicate reads marked/removed
pattern: "*.{bam,cram}"
- bam_index:
type: file
description: BAM index file
pattern: "*.{bai}"
- metrics:
type: file
description: Duplicate metrics file generated by biobambam
pattern: "*.{metrics.txt}"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@matthdsm"

View file

@ -214,6 +214,10 @@ biobambam/bammarkduplicates2:
- modules/biobambam/bammarkduplicates2/**
- tests/modules/biobambam/bammarkduplicates2/**
biobambam/bamsormadup:
- modules/biobambam/bamsormadup/**
- tests/modules/biobambam/bamsormadup/**
biscuit/align:
- modules/biscuit/index/**
- modules/biscuit/align/**

View file

@ -0,0 +1,15 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { BIOBAMBAM_BAMSORMADUP } from '../../../../modules/biobambam/bamsormadup/main.nf'
workflow test_biobambam_bamsormadup {
input = [
[ id:'test', single_end:false ], // meta map
[file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true), file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true)],
]
BIOBAMBAM_BAMSORMADUP ( input, [] )
}

View file

@ -0,0 +1,5 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}

View file

@ -0,0 +1,11 @@
- name: biobambam bamsormadup test_biobambam_bamsormadup
command: nextflow run tests/modules/biobambam/bamsormadup -entry test_biobambam_bamsormadup -c tests/config/nextflow.config
tags:
- biobambam/bamsormadup
- biobambam
files:
- path: output/biobambam/test.bam
md5sum: 243a77fb0642fd46bb16a4d3432d19dc
- path: output/biobambam/test.metrics.txt
md5sum: 1721879bea1f3888ecd33b35e6ee0e72
- path: output/biobambam/versions.yml