Add new boolean sort_bam as input to be able to chose between sort and view in bwamem and bwamem2mem (#1125)

* feat: view is now in args2 so we can use sort

* forgot one split_cpus

* feat: update with new logic

* fix: add more info

* fix: remove split_cpus logic
This commit is contained in:
Maxime U. Garcia 2021-12-08 15:31:27 +01:00 committed by GitHub
parent 31d4099f38
commit 1765225042
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 168 additions and 9 deletions

View file

@ -10,6 +10,7 @@ process BWA_MEM {
input: input:
tuple val(meta), path(reads) tuple val(meta), path(reads)
path index path index
val sort_bam
output: output:
tuple val(meta), path("*.bam"), emit: bam tuple val(meta), path("*.bam"), emit: bam
@ -20,6 +21,7 @@ process BWA_MEM {
def args2 = task.ext.args2 ?: '' def args2 = task.ext.args2 ?: ''
def prefix = task.ext.prefix ?: "${meta.id}" def prefix = task.ext.prefix ?: "${meta.id}"
def read_group = meta.read_group ? "-R ${meta.read_group}" : "" def read_group = meta.read_group ? "-R ${meta.read_group}" : ""
def samtools_command = sort_bam ? 'sort' : 'view'
""" """
INDEX=`find -L ./ -name "*.amb" | sed 's/.amb//'` INDEX=`find -L ./ -name "*.amb" | sed 's/.amb//'`
@ -29,7 +31,7 @@ process BWA_MEM {
-t $task.cpus \\ -t $task.cpus \\
\$INDEX \\ \$INDEX \\
$reads \\ $reads \\
| samtools view $args2 -@ $task.cpus -bhS -o ${prefix}.bam - | samtools $samtools_command $args2 --threads $task.cpus -o ${prefix}.bam -
cat <<-END_VERSIONS > versions.yml cat <<-END_VERSIONS > versions.yml
"${task.process}": "${task.process}":

View file

@ -32,6 +32,10 @@ input:
type: file type: file
description: BWA genome index files description: BWA genome index files
pattern: "Directory containing BWA index *.{amb,ann,bwt,pac,sa}" pattern: "Directory containing BWA index *.{amb,ann,bwt,pac,sa}"
- sort_bam:
type: boolean
description: use samtools sort (true) or samtools view (false)
pattern: "true or false"
output: output:
- bam: - bam:
type: file type: file

View file

@ -10,6 +10,7 @@ process BWAMEM2_MEM {
input: input:
tuple val(meta), path(reads) tuple val(meta), path(reads)
path index path index
val sort_bam
output: output:
tuple val(meta), path("*.bam"), emit: bam tuple val(meta), path("*.bam"), emit: bam
@ -20,6 +21,7 @@ process BWAMEM2_MEM {
def args2 = task.ext.args2 ?: '' def args2 = task.ext.args2 ?: ''
def prefix = task.ext.prefix ?: "${meta.id}" def prefix = task.ext.prefix ?: "${meta.id}"
def read_group = meta.read_group ? "-R ${meta.read_group}" : "" def read_group = meta.read_group ? "-R ${meta.read_group}" : ""
def samtools_command = sort_bam ? 'sort' : 'view'
""" """
INDEX=`find -L ./ -name "*.amb" | sed 's/.amb//'` INDEX=`find -L ./ -name "*.amb" | sed 's/.amb//'`
@ -30,7 +32,7 @@ process BWAMEM2_MEM {
-t $task.cpus \\ -t $task.cpus \\
\$INDEX \\ \$INDEX \\
$reads \\ $reads \\
| samtools view $args2 -@ $task.cpus -bhS -o ${prefix}.bam - | samtools $samtools_command $args2 -@ $task.cpus -o ${prefix}.bam -
cat <<-END_VERSIONS > versions.yml cat <<-END_VERSIONS > versions.yml
"${task.process}": "${task.process}":

View file

@ -11,9 +11,9 @@ keywords:
tools: tools:
- bwa: - bwa:
description: | description: |
BWA is a software package for mapping DNA sequences against BWA-mem2 is a software package for mapping DNA sequences against
a large reference genome, such as the human genome. a large reference genome, such as the human genome.
homepage: http://bio-bwa.sourceforge.net/ homepage: https://github.com/bwa-mem2/bwa-mem2
documentation: http://www.htslib.org/doc/samtools.html documentation: http://www.htslib.org/doc/samtools.html
arxiv: arXiv:1303.3997 arxiv: arXiv:1303.3997
licence: ['MIT'] licence: ['MIT']
@ -31,7 +31,11 @@ input:
- index: - index:
type: file type: file
description: BWA genome index files description: BWA genome index files
pattern: "Directory containing BWA index *.{amb,ann,bwt,pac,sa}" pattern: "Directory containing BWA index *.{0132,amb,ann,bwt.2bit.64,pac}"
- sort_bam:
type: boolean
description: use samtools sort (true) or samtools view (false)
pattern: "true or false"
output: output:
- bam: - bam:
type: file type: file

View file

@ -18,7 +18,23 @@ workflow test_bwa_mem_single_end {
fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
BWA_INDEX ( fasta ) BWA_INDEX ( fasta )
BWA_MEM ( input, BWA_INDEX.out.index ) BWA_MEM ( input, BWA_INDEX.out.index, false )
}
//
// Test with single-end data and sort
//
workflow test_bwa_mem_single_end_sort {
input = [
[ id:'test', single_end:true ], // meta map
[
file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
]
]
fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
BWA_INDEX ( fasta )
BWA_MEM ( input, BWA_INDEX.out.index, true )
} }
// //
@ -35,5 +51,22 @@ workflow test_bwa_mem_paired_end {
fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
BWA_INDEX ( fasta ) BWA_INDEX ( fasta )
BWA_MEM ( input, BWA_INDEX.out.index ) BWA_MEM ( input, BWA_INDEX.out.index, false )
}
//
// Test with paired-end data and sort
//
workflow test_bwa_mem_paired_end_sort {
input = [
[ id:'test', single_end:false ], // meta map
[
file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)
]
]
fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
BWA_INDEX ( fasta )
BWA_MEM ( input, BWA_INDEX.out.index, true )
} }

View file

@ -2,4 +2,8 @@ process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
withName: BWA_MEM {
ext.args2 = { sort_bam ? "" : "-bh" }
}
} }

View file

@ -16,6 +16,24 @@
- path: ./output/bwa/bwa/genome.sa - path: ./output/bwa/bwa/genome.sa
md5sum: ab3952cabf026b48cd3eb5bccbb636d1 md5sum: ab3952cabf026b48cd3eb5bccbb636d1
- name: bwa mem single-end sort
command: nextflow run ./tests/modules/bwa/mem -entry test_bwa_mem_single_end_sort -c ./tests/config/nextflow.config -c ./tests/modules/bwa/mem/nextflow.config
tags:
- bwa
- bwa/mem
files:
- path: ./output/bwa/test.bam
- path: ./output/bwa/bwa/genome.bwt
md5sum: 0469c30a1e239dd08f68afe66fde99da
- path: ./output/bwa/bwa/genome.amb
md5sum: 3a68b8b2287e07dd3f5f95f4344ba76e
- path: ./output/bwa/bwa/genome.ann
md5sum: c32e11f6c859f166c7525a9c1d583567
- path: ./output/bwa/bwa/genome.pac
md5sum: 983e3d2cd6f36e2546e6d25a0da78d66
- path: ./output/bwa/bwa/genome.sa
md5sum: ab3952cabf026b48cd3eb5bccbb636d1
- name: bwa mem paired-end - name: bwa mem paired-end
command: nextflow run ./tests/modules/bwa/mem -entry test_bwa_mem_paired_end -c ./tests/config/nextflow.config -c ./tests/modules/bwa/mem/nextflow.config command: nextflow run ./tests/modules/bwa/mem -entry test_bwa_mem_paired_end -c ./tests/config/nextflow.config -c ./tests/modules/bwa/mem/nextflow.config
tags: tags:
@ -33,3 +51,21 @@
md5sum: 983e3d2cd6f36e2546e6d25a0da78d66 md5sum: 983e3d2cd6f36e2546e6d25a0da78d66
- path: ./output/bwa/bwa/genome.sa - path: ./output/bwa/bwa/genome.sa
md5sum: ab3952cabf026b48cd3eb5bccbb636d1 md5sum: ab3952cabf026b48cd3eb5bccbb636d1
- name: bwa mem paired-end sort
command: nextflow run ./tests/modules/bwa/mem -entry test_bwa_mem_paired_end_sort -c ./tests/config/nextflow.config -c ./tests/modules/bwa/mem/nextflow.config
tags:
- bwa
- bwa/mem
files:
- path: ./output/bwa/test.bam
- path: ./output/bwa/bwa/genome.bwt
md5sum: 0469c30a1e239dd08f68afe66fde99da
- path: ./output/bwa/bwa/genome.amb
md5sum: 3a68b8b2287e07dd3f5f95f4344ba76e
- path: ./output/bwa/bwa/genome.ann
md5sum: c32e11f6c859f166c7525a9c1d583567
- path: ./output/bwa/bwa/genome.pac
md5sum: 983e3d2cd6f36e2546e6d25a0da78d66
- path: ./output/bwa/bwa/genome.sa
md5sum: ab3952cabf026b48cd3eb5bccbb636d1

View file

@ -18,9 +18,26 @@ workflow test_bwamem2_mem_single_end {
fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
BWAMEM2_INDEX ( fasta ) BWAMEM2_INDEX ( fasta )
BWAMEM2_MEM ( input, BWAMEM2_INDEX.out.index ) BWAMEM2_MEM ( input, BWAMEM2_INDEX.out.index, false )
} }
//
// Test with single-end data and sort
//
workflow test_bwamem2_mem_single_end_sort {
input = [
[ id:'test', single_end:true ], // meta map
[
file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
]
]
fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
BWAMEM2_INDEX ( fasta )
BWAMEM2_MEM ( input, BWAMEM2_INDEX.out.index, true )
}
// //
// Test with paired-end data // Test with paired-end data
// //
@ -35,5 +52,22 @@ workflow test_bwamem2_mem_paired_end {
fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
BWAMEM2_INDEX ( fasta ) BWAMEM2_INDEX ( fasta )
BWAMEM2_MEM ( input, BWAMEM2_INDEX.out.index ) BWAMEM2_MEM ( input, BWAMEM2_INDEX.out.index, false )
}
//
// Test with paired-end data and sort
//
workflow test_bwamem2_mem_paired_end_sort {
input = [
[ id:'test', single_end:false ], // meta map
[
file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)
]
]
fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
BWAMEM2_INDEX ( fasta )
BWAMEM2_MEM ( input, BWAMEM2_INDEX.out.index, true )
} }

View file

@ -2,4 +2,8 @@ process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
withName: BWAMEM2_MEM {
ext.args2 = { sort_bam ? "" : "-bh" }
}
} }

View file

@ -16,6 +16,24 @@
- path: ./output/bwamem2/bwamem2/genome.fasta.ann - path: ./output/bwamem2/bwamem2/genome.fasta.ann
md5sum: c32e11f6c859f166c7525a9c1d583567 md5sum: c32e11f6c859f166c7525a9c1d583567
- name: bwamem2 mem single-end sort
command: nextflow run ./tests/modules/bwamem2/mem -entry test_bwamem2_mem_single_end_sort -c ./tests/config/nextflow.config -c ./tests/modules/bwamem2/mem/nextflow.config
tags:
- bwamem2
- bwamem2/mem
files:
- path: ./output/bwamem2/test.bam
- path: ./output/bwamem2/bwamem2/genome.fasta.amb
md5sum: 3a68b8b2287e07dd3f5f95f4344ba76e
- path: ./output/bwamem2/bwamem2/genome.fasta.pac
md5sum: 983e3d2cd6f36e2546e6d25a0da78d66
- path: ./output/bwamem2/bwamem2/genome.fasta.0123
md5sum: b02870de80106104abcb03cd9463e7d8
- path: ./output/bwamem2/bwamem2/genome.fasta.bwt.2bit.64
md5sum: d097a1b82dee375d41a1ea69895a9216
- path: ./output/bwamem2/bwamem2/genome.fasta.ann
md5sum: c32e11f6c859f166c7525a9c1d583567
- name: bwamem2 mem paired-end - name: bwamem2 mem paired-end
command: nextflow run ./tests/modules/bwamem2/mem -entry test_bwamem2_mem_paired_end -c ./tests/config/nextflow.config -c ./tests/modules/bwamem2/mem/nextflow.config command: nextflow run ./tests/modules/bwamem2/mem -entry test_bwamem2_mem_paired_end -c ./tests/config/nextflow.config -c ./tests/modules/bwamem2/mem/nextflow.config
tags: tags:
@ -33,3 +51,21 @@
md5sum: d097a1b82dee375d41a1ea69895a9216 md5sum: d097a1b82dee375d41a1ea69895a9216
- path: ./output/bwamem2/bwamem2/genome.fasta.ann - path: ./output/bwamem2/bwamem2/genome.fasta.ann
md5sum: c32e11f6c859f166c7525a9c1d583567 md5sum: c32e11f6c859f166c7525a9c1d583567
- name: bwamem2 mem paired-end sort
command: nextflow run ./tests/modules/bwamem2/mem -entry test_bwamem2_mem_paired_end_sort -c ./tests/config/nextflow.config -c ./tests/modules/bwamem2/mem/nextflow.config
tags:
- bwamem2
- bwamem2/mem
files:
- path: ./output/bwamem2/test.bam
- path: ./output/bwamem2/bwamem2/genome.fasta.amb
md5sum: 3a68b8b2287e07dd3f5f95f4344ba76e
- path: ./output/bwamem2/bwamem2/genome.fasta.pac
md5sum: 983e3d2cd6f36e2546e6d25a0da78d66
- path: ./output/bwamem2/bwamem2/genome.fasta.0123
md5sum: b02870de80106104abcb03cd9463e7d8
- path: ./output/bwamem2/bwamem2/genome.fasta.bwt.2bit.64
md5sum: d097a1b82dee375d41a1ea69895a9216
- path: ./output/bwamem2/bwamem2/genome.fasta.ann
md5sum: c32e11f6c859f166c7525a9c1d583567