From dc95e67e153ad937b869d90229333c0654628912 Mon Sep 17 00:00:00 2001 From: Matthias De Smet <11850640+matthdsm@users.noreply.github.com> Date: Wed, 6 Apr 2022 08:18:23 +0200 Subject: [PATCH] New tool: biobambam/bamsormadup (#1478) * add bamsormadup * fix yaml * add test.yml * Update tests/modules/biobambam/bamsormadup/test.yml Co-authored-by: James A. Fellows Yates * test meta.yaml: remove md5sums * Tool bamsormadup: - add (optional) reference input - add bam index ouput - add cram output option - make metrics output: more general * fix input and output formats * update input file description * drop sam output, goes against nf-core regs; add input check for cram files * fix typo * Update modules/biobambam/bamsormadup/main.nf Co-authored-by: James A. Fellows Yates * improve ref fasta name * fix if else shorthand * fix syntax error * kind of fix tests * set fixed suffix for metrics file to keep it in line with picard and bammarkduplicates2 * fix command line * update test.yml * add support for multiple input bams * Update modules/biobambam/bamsormadup/meta.yml Co-authored-by: James A. Fellows Yates * Update modules/biobambam/bamsormadup/meta.yml Co-authored-by: James A. Fellows Yates * Update tests/modules/biobambam/bamsormadup/test.yml Co-authored-by: James A. Fellows Yates Co-authored-by: James A. Fellows Yates --- modules/biobambam/bamsormadup/main.nf | 46 ++++++++++++++++ modules/biobambam/bamsormadup/meta.yml | 52 +++++++++++++++++++ tests/config/pytest_modules.yml | 4 ++ tests/modules/biobambam/bamsormadup/main.nf | 15 ++++++ .../biobambam/bamsormadup/nextflow.config | 5 ++ tests/modules/biobambam/bamsormadup/test.yml | 11 ++++ 6 files changed, 133 insertions(+) create mode 100644 modules/biobambam/bamsormadup/main.nf create mode 100644 modules/biobambam/bamsormadup/meta.yml create mode 100644 tests/modules/biobambam/bamsormadup/main.nf create mode 100644 tests/modules/biobambam/bamsormadup/nextflow.config create mode 100644 tests/modules/biobambam/bamsormadup/test.yml diff --git a/modules/biobambam/bamsormadup/main.nf b/modules/biobambam/bamsormadup/main.nf new file mode 100644 index 00000000..b9e28e43 --- /dev/null +++ b/modules/biobambam/bamsormadup/main.nf @@ -0,0 +1,46 @@ +process BIOBAMBAM_BAMSORMADUP { + tag "$meta.id" + label "process_medium" + + conda (params.enable_conda ? "bioconda::biobambam=2.0.183" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/biobambam:2.0.183--h9f5acd7_1' : 'quay.io/biocontainers/biobambam:2.0.183--h9f5acd7_1'}" + + input: + tuple val(meta), path(bams) + path(fasta) + + output: + tuple val(meta), path("*.{bam,cram}") ,emit: bam + tuple val(meta), path("*.bam.bai") ,optional:true, emit: bam_index + tuple val(meta), path("*.metrics.txt") ,emit: metrics + path "versions.yml" ,emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def suffix = args.contains("outputformat=cram") ? "cram" : "bam" + def input_string = bams.join(" I=") + + if (args.contains("outputformat=cram") && reference == null) error "Reference required for CRAM output." + + """ + bamcat \\ + I=${input_string} \\ + level=0 \\ + | bamsormadup \\ + $args \\ + M=${prefix}.metrics.txt \\ + tmpfile=$prefix \\ + threads=$task.cpus \\ + > ${prefix}.${suffix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bamcat: \$(echo \$(bamsormadup --version 2>&1) | sed 's/^This is biobambam2 version //; s/..biobambam2 is .*\$//' ) + bamsormadup: \$(echo \$(bamsormadup --version 2>&1) | sed 's/^This is biobambam2 version //; s/..biobambam2 is .*\$//' ) + END_VERSIONS + """ +} diff --git a/modules/biobambam/bamsormadup/meta.yml b/modules/biobambam/bamsormadup/meta.yml new file mode 100644 index 00000000..39acf3b3 --- /dev/null +++ b/modules/biobambam/bamsormadup/meta.yml @@ -0,0 +1,52 @@ +name: biobambam_bamsormadup +description: Parallel sorting and duplicate marking +keywords: + - markduplicates + - sort + - bam + - cram +tools: + - biobambam: + description: | + biobambam is a set of tools for early stage alignment file processing. + homepage: https://gitlab.com/german.tischler/biobambam2 + documentation: https://gitlab.com/german.tischler/biobambam2/-/blob/master/README.md + doi: 10.1186/1751-0473-9-13 + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bams: + type: file + description: List containing 1 or more bam files + - fasta: + type: file + description: Reference genome in FASTA format (optional) + pattern: "*.{fa,fasta}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM file with duplicate reads marked/removed + pattern: "*.{bam,cram}" + - bam_index: + type: file + description: BAM index file + pattern: "*.{bai}" + - metrics: + type: file + description: Duplicate metrics file generated by biobambam + pattern: "*.{metrics.txt}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@matthdsm" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 6d66f230..d47b95c4 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -214,6 +214,10 @@ biobambam/bammarkduplicates2: - modules/biobambam/bammarkduplicates2/** - tests/modules/biobambam/bammarkduplicates2/** +biobambam/bamsormadup: + - modules/biobambam/bamsormadup/** + - tests/modules/biobambam/bamsormadup/** + biscuit/align: - modules/biscuit/index/** - modules/biscuit/align/** diff --git a/tests/modules/biobambam/bamsormadup/main.nf b/tests/modules/biobambam/bamsormadup/main.nf new file mode 100644 index 00000000..741a4433 --- /dev/null +++ b/tests/modules/biobambam/bamsormadup/main.nf @@ -0,0 +1,15 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { BIOBAMBAM_BAMSORMADUP } from '../../../../modules/biobambam/bamsormadup/main.nf' + +workflow test_biobambam_bamsormadup { + + input = [ + [ id:'test', single_end:false ], // meta map + [file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true), file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true)], + ] + + BIOBAMBAM_BAMSORMADUP ( input, [] ) +} diff --git a/tests/modules/biobambam/bamsormadup/nextflow.config b/tests/modules/biobambam/bamsormadup/nextflow.config new file mode 100644 index 00000000..8730f1c4 --- /dev/null +++ b/tests/modules/biobambam/bamsormadup/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} diff --git a/tests/modules/biobambam/bamsormadup/test.yml b/tests/modules/biobambam/bamsormadup/test.yml new file mode 100644 index 00000000..a7a14202 --- /dev/null +++ b/tests/modules/biobambam/bamsormadup/test.yml @@ -0,0 +1,11 @@ +- name: biobambam bamsormadup test_biobambam_bamsormadup + command: nextflow run tests/modules/biobambam/bamsormadup -entry test_biobambam_bamsormadup -c tests/config/nextflow.config + tags: + - biobambam/bamsormadup + - biobambam + files: + - path: output/biobambam/test.bam + md5sum: 243a77fb0642fd46bb16a4d3432d19dc + - path: output/biobambam/test.metrics.txt + md5sum: 1721879bea1f3888ecd33b35e6ee0e72 + - path: output/biobambam/versions.yml