From 1765225042d40cf01eb9469ea95bf6d0f6810937 Mon Sep 17 00:00:00 2001 From: "Maxime U. Garcia" Date: Wed, 8 Dec 2021 15:31:27 +0100 Subject: [PATCH] Add new boolean sort_bam as input to be able to chose between sort and view in bwamem and bwamem2mem (#1125) * feat: view is now in args2 so we can use sort * forgot one split_cpus * feat: update with new logic * fix: add more info * fix: remove split_cpus logic --- modules/bwa/mem/main.nf | 4 ++- modules/bwa/mem/meta.yml | 4 +++ modules/bwamem2/mem/main.nf | 4 ++- modules/bwamem2/mem/meta.yml | 10 ++++-- tests/modules/bwa/mem/main.nf | 37 ++++++++++++++++++++-- tests/modules/bwa/mem/nextflow.config | 4 +++ tests/modules/bwa/mem/test.yml | 36 +++++++++++++++++++++ tests/modules/bwamem2/mem/main.nf | 38 +++++++++++++++++++++-- tests/modules/bwamem2/mem/nextflow.config | 4 +++ tests/modules/bwamem2/mem/test.yml | 36 +++++++++++++++++++++ 10 files changed, 168 insertions(+), 9 deletions(-) diff --git a/modules/bwa/mem/main.nf b/modules/bwa/mem/main.nf index 801293a8..9695bd2d 100644 --- a/modules/bwa/mem/main.nf +++ b/modules/bwa/mem/main.nf @@ -10,6 +10,7 @@ process BWA_MEM { input: tuple val(meta), path(reads) path index + val sort_bam output: tuple val(meta), path("*.bam"), emit: bam @@ -20,6 +21,7 @@ process BWA_MEM { def args2 = task.ext.args2 ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def read_group = meta.read_group ? "-R ${meta.read_group}" : "" + def samtools_command = sort_bam ? 'sort' : 'view' """ INDEX=`find -L ./ -name "*.amb" | sed 's/.amb//'` @@ -29,7 +31,7 @@ process BWA_MEM { -t $task.cpus \\ \$INDEX \\ $reads \\ - | samtools view $args2 -@ $task.cpus -bhS -o ${prefix}.bam - + | samtools $samtools_command $args2 --threads $task.cpus -o ${prefix}.bam - cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/bwa/mem/meta.yml b/modules/bwa/mem/meta.yml index 61eaddef..c7c28f19 100644 --- a/modules/bwa/mem/meta.yml +++ b/modules/bwa/mem/meta.yml @@ -32,6 +32,10 @@ input: type: file description: BWA genome index files pattern: "Directory containing BWA index *.{amb,ann,bwt,pac,sa}" + - sort_bam: + type: boolean + description: use samtools sort (true) or samtools view (false) + pattern: "true or false" output: - bam: type: file diff --git a/modules/bwamem2/mem/main.nf b/modules/bwamem2/mem/main.nf index 81b4b8ab..6d4d8028 100644 --- a/modules/bwamem2/mem/main.nf +++ b/modules/bwamem2/mem/main.nf @@ -10,6 +10,7 @@ process BWAMEM2_MEM { input: tuple val(meta), path(reads) path index + val sort_bam output: tuple val(meta), path("*.bam"), emit: bam @@ -20,6 +21,7 @@ process BWAMEM2_MEM { def args2 = task.ext.args2 ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def read_group = meta.read_group ? "-R ${meta.read_group}" : "" + def samtools_command = sort_bam ? 'sort' : 'view' """ INDEX=`find -L ./ -name "*.amb" | sed 's/.amb//'` @@ -30,7 +32,7 @@ process BWAMEM2_MEM { -t $task.cpus \\ \$INDEX \\ $reads \\ - | samtools view $args2 -@ $task.cpus -bhS -o ${prefix}.bam - + | samtools $samtools_command $args2 -@ $task.cpus -o ${prefix}.bam - cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/bwamem2/mem/meta.yml b/modules/bwamem2/mem/meta.yml index 58a35e08..71e83759 100644 --- a/modules/bwamem2/mem/meta.yml +++ b/modules/bwamem2/mem/meta.yml @@ -11,9 +11,9 @@ keywords: tools: - bwa: description: | - BWA is a software package for mapping DNA sequences against + BWA-mem2 is a software package for mapping DNA sequences against a large reference genome, such as the human genome. - homepage: http://bio-bwa.sourceforge.net/ + homepage: https://github.com/bwa-mem2/bwa-mem2 documentation: http://www.htslib.org/doc/samtools.html arxiv: arXiv:1303.3997 licence: ['MIT'] @@ -31,7 +31,11 @@ input: - index: type: file description: BWA genome index files - pattern: "Directory containing BWA index *.{amb,ann,bwt,pac,sa}" + pattern: "Directory containing BWA index *.{0132,amb,ann,bwt.2bit.64,pac}" + - sort_bam: + type: boolean + description: use samtools sort (true) or samtools view (false) + pattern: "true or false" output: - bam: type: file diff --git a/tests/modules/bwa/mem/main.nf b/tests/modules/bwa/mem/main.nf index 117cbb4d..c9c57197 100644 --- a/tests/modules/bwa/mem/main.nf +++ b/tests/modules/bwa/mem/main.nf @@ -18,7 +18,23 @@ workflow test_bwa_mem_single_end { fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) BWA_INDEX ( fasta ) - BWA_MEM ( input, BWA_INDEX.out.index ) + BWA_MEM ( input, BWA_INDEX.out.index, false ) +} + +// +// Test with single-end data and sort +// +workflow test_bwa_mem_single_end_sort { + input = [ + [ id:'test', single_end:true ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + ] + fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + + BWA_INDEX ( fasta ) + BWA_MEM ( input, BWA_INDEX.out.index, true ) } // @@ -35,5 +51,22 @@ workflow test_bwa_mem_paired_end { fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) BWA_INDEX ( fasta ) - BWA_MEM ( input, BWA_INDEX.out.index ) + BWA_MEM ( input, BWA_INDEX.out.index, false ) +} + +// +// Test with paired-end data and sort +// +workflow test_bwa_mem_paired_end_sort { + input = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + + BWA_INDEX ( fasta ) + BWA_MEM ( input, BWA_INDEX.out.index, true ) } diff --git a/tests/modules/bwa/mem/nextflow.config b/tests/modules/bwa/mem/nextflow.config index 8730f1c4..d15f6939 100644 --- a/tests/modules/bwa/mem/nextflow.config +++ b/tests/modules/bwa/mem/nextflow.config @@ -2,4 +2,8 @@ process { publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + withName: BWA_MEM { + ext.args2 = { sort_bam ? "" : "-bh" } + } + } diff --git a/tests/modules/bwa/mem/test.yml b/tests/modules/bwa/mem/test.yml index 93535043..8fe2ee6b 100644 --- a/tests/modules/bwa/mem/test.yml +++ b/tests/modules/bwa/mem/test.yml @@ -16,6 +16,24 @@ - path: ./output/bwa/bwa/genome.sa md5sum: ab3952cabf026b48cd3eb5bccbb636d1 +- name: bwa mem single-end sort + command: nextflow run ./tests/modules/bwa/mem -entry test_bwa_mem_single_end_sort -c ./tests/config/nextflow.config -c ./tests/modules/bwa/mem/nextflow.config + tags: + - bwa + - bwa/mem + files: + - path: ./output/bwa/test.bam + - path: ./output/bwa/bwa/genome.bwt + md5sum: 0469c30a1e239dd08f68afe66fde99da + - path: ./output/bwa/bwa/genome.amb + md5sum: 3a68b8b2287e07dd3f5f95f4344ba76e + - path: ./output/bwa/bwa/genome.ann + md5sum: c32e11f6c859f166c7525a9c1d583567 + - path: ./output/bwa/bwa/genome.pac + md5sum: 983e3d2cd6f36e2546e6d25a0da78d66 + - path: ./output/bwa/bwa/genome.sa + md5sum: ab3952cabf026b48cd3eb5bccbb636d1 + - name: bwa mem paired-end command: nextflow run ./tests/modules/bwa/mem -entry test_bwa_mem_paired_end -c ./tests/config/nextflow.config -c ./tests/modules/bwa/mem/nextflow.config tags: @@ -33,3 +51,21 @@ md5sum: 983e3d2cd6f36e2546e6d25a0da78d66 - path: ./output/bwa/bwa/genome.sa md5sum: ab3952cabf026b48cd3eb5bccbb636d1 + +- name: bwa mem paired-end sort + command: nextflow run ./tests/modules/bwa/mem -entry test_bwa_mem_paired_end_sort -c ./tests/config/nextflow.config -c ./tests/modules/bwa/mem/nextflow.config + tags: + - bwa + - bwa/mem + files: + - path: ./output/bwa/test.bam + - path: ./output/bwa/bwa/genome.bwt + md5sum: 0469c30a1e239dd08f68afe66fde99da + - path: ./output/bwa/bwa/genome.amb + md5sum: 3a68b8b2287e07dd3f5f95f4344ba76e + - path: ./output/bwa/bwa/genome.ann + md5sum: c32e11f6c859f166c7525a9c1d583567 + - path: ./output/bwa/bwa/genome.pac + md5sum: 983e3d2cd6f36e2546e6d25a0da78d66 + - path: ./output/bwa/bwa/genome.sa + md5sum: ab3952cabf026b48cd3eb5bccbb636d1 diff --git a/tests/modules/bwamem2/mem/main.nf b/tests/modules/bwamem2/mem/main.nf index 2ab557e6..b4293dbe 100644 --- a/tests/modules/bwamem2/mem/main.nf +++ b/tests/modules/bwamem2/mem/main.nf @@ -18,9 +18,26 @@ workflow test_bwamem2_mem_single_end { fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) BWAMEM2_INDEX ( fasta ) - BWAMEM2_MEM ( input, BWAMEM2_INDEX.out.index ) + BWAMEM2_MEM ( input, BWAMEM2_INDEX.out.index, false ) } +// +// Test with single-end data and sort +// +workflow test_bwamem2_mem_single_end_sort { + input = [ + [ id:'test', single_end:true ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + ] + fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + + BWAMEM2_INDEX ( fasta ) + BWAMEM2_MEM ( input, BWAMEM2_INDEX.out.index, true ) +} + + // // Test with paired-end data // @@ -35,5 +52,22 @@ workflow test_bwamem2_mem_paired_end { fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) BWAMEM2_INDEX ( fasta ) - BWAMEM2_MEM ( input, BWAMEM2_INDEX.out.index ) + BWAMEM2_MEM ( input, BWAMEM2_INDEX.out.index, false ) +} + +// +// Test with paired-end data and sort +// +workflow test_bwamem2_mem_paired_end_sort { + input = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + + BWAMEM2_INDEX ( fasta ) + BWAMEM2_MEM ( input, BWAMEM2_INDEX.out.index, true ) } diff --git a/tests/modules/bwamem2/mem/nextflow.config b/tests/modules/bwamem2/mem/nextflow.config index 8730f1c4..b5181865 100644 --- a/tests/modules/bwamem2/mem/nextflow.config +++ b/tests/modules/bwamem2/mem/nextflow.config @@ -2,4 +2,8 @@ process { publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + withName: BWAMEM2_MEM { + ext.args2 = { sort_bam ? "" : "-bh" } + } + } diff --git a/tests/modules/bwamem2/mem/test.yml b/tests/modules/bwamem2/mem/test.yml index c1724bc0..bf445ebe 100644 --- a/tests/modules/bwamem2/mem/test.yml +++ b/tests/modules/bwamem2/mem/test.yml @@ -16,6 +16,24 @@ - path: ./output/bwamem2/bwamem2/genome.fasta.ann md5sum: c32e11f6c859f166c7525a9c1d583567 +- name: bwamem2 mem single-end sort + command: nextflow run ./tests/modules/bwamem2/mem -entry test_bwamem2_mem_single_end_sort -c ./tests/config/nextflow.config -c ./tests/modules/bwamem2/mem/nextflow.config + tags: + - bwamem2 + - bwamem2/mem + files: + - path: ./output/bwamem2/test.bam + - path: ./output/bwamem2/bwamem2/genome.fasta.amb + md5sum: 3a68b8b2287e07dd3f5f95f4344ba76e + - path: ./output/bwamem2/bwamem2/genome.fasta.pac + md5sum: 983e3d2cd6f36e2546e6d25a0da78d66 + - path: ./output/bwamem2/bwamem2/genome.fasta.0123 + md5sum: b02870de80106104abcb03cd9463e7d8 + - path: ./output/bwamem2/bwamem2/genome.fasta.bwt.2bit.64 + md5sum: d097a1b82dee375d41a1ea69895a9216 + - path: ./output/bwamem2/bwamem2/genome.fasta.ann + md5sum: c32e11f6c859f166c7525a9c1d583567 + - name: bwamem2 mem paired-end command: nextflow run ./tests/modules/bwamem2/mem -entry test_bwamem2_mem_paired_end -c ./tests/config/nextflow.config -c ./tests/modules/bwamem2/mem/nextflow.config tags: @@ -33,3 +51,21 @@ md5sum: d097a1b82dee375d41a1ea69895a9216 - path: ./output/bwamem2/bwamem2/genome.fasta.ann md5sum: c32e11f6c859f166c7525a9c1d583567 + +- name: bwamem2 mem paired-end sort + command: nextflow run ./tests/modules/bwamem2/mem -entry test_bwamem2_mem_paired_end_sort -c ./tests/config/nextflow.config -c ./tests/modules/bwamem2/mem/nextflow.config + tags: + - bwamem2 + - bwamem2/mem + files: + - path: ./output/bwamem2/test.bam + - path: ./output/bwamem2/bwamem2/genome.fasta.amb + md5sum: 3a68b8b2287e07dd3f5f95f4344ba76e + - path: ./output/bwamem2/bwamem2/genome.fasta.pac + md5sum: 983e3d2cd6f36e2546e6d25a0da78d66 + - path: ./output/bwamem2/bwamem2/genome.fasta.0123 + md5sum: b02870de80106104abcb03cd9463e7d8 + - path: ./output/bwamem2/bwamem2/genome.fasta.bwt.2bit.64 + md5sum: d097a1b82dee375d41a1ea69895a9216 + - path: ./output/bwamem2/bwamem2/genome.fasta.ann + md5sum: c32e11f6c859f166c7525a9c1d583567