update for bcftools merge (#1908)

* update for bcftools merge

* Update modules/bcftools/merge/main.nf

Co-authored-by: Maxime U. Garcia <maxime.garcia@scilifelab.se>

* Update modules/bcftools/merge/main.nf

Co-authored-by: Maxime U. Garcia <maxime.garcia@scilifelab.se>

* updated test.yml

* added the bed file to the main input tuple

* merged all output into one output channel

* added a test for bcf.gz output

* Update modules/bcftools/merge/main.nf

Co-authored-by: Matthias De Smet <11850640+matthdsm@users.noreply.github.com>

* updated the tests

Co-authored-by: Maxime U. Garcia <maxime.garcia@scilifelab.se>
Co-authored-by: Matthias De Smet <11850640+matthdsm@users.noreply.github.com>
This commit is contained in:
nvnieuwk 2022-07-20 11:22:51 +02:00 committed by GitHub
parent 8214b73700
commit 8656636f0d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 149 additions and 13 deletions

View file

@ -53,10 +53,22 @@ output:
type: file type: file
description: File containing software versions description: File containing software versions
pattern: "versions.yml" pattern: "versions.yml"
- vcf_gz:
type: file
description: VCF merged output file (bgzipped) => when `--output-type z` is used
pattern: "*.vcf.gz"
- vcf: - vcf:
type: file type: file
description: The converted VCF file description: VCF merged output file => when `--output-type v` is used
pattern: "*.{bam,cram,sam}" pattern: "*.vcf"
- bcf_gz:
type: file
description: BCF merged output file (bgzipped) => when `--output-type b` is used
pattern: "*.bcf.gz"
- bcf:
type: file
description: BCF merged output file => when `--output-type u` is used
pattern: "*.bcf"
authors: authors:
- "@nvnieuwk" - "@nvnieuwk"

View file

@ -9,9 +9,12 @@ process BCFTOOLS_MERGE {
input: input:
tuple val(meta), path(vcfs), path(tbis) tuple val(meta), path(vcfs), path(tbis)
path bed
path fasta
path fasta_fai
output: output:
tuple val(meta), path("*.gz"), emit: vcf tuple val(meta), path("*.{bcf,vcf}{,.gz}"), emit: merged_variants
path "versions.yml" , emit: versions path "versions.yml" , emit: versions
when: when:
@ -19,12 +22,22 @@ process BCFTOOLS_MERGE {
script: script:
def args = task.ext.args ?: '' def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}" def prefix = task.ext.prefix ?: "${meta.id}"
def regions = bed ? "--regions-file $bed" : ""
def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" :
args.contains("--output-type u") || args.contains("-Ou") ? "bcf" :
args.contains("--output-type v") || args.contains("-Ov") ? "vcf" :
"vcf.gz"
""" """
bcftools merge -Oz \\ bcftools merge \\
--output ${prefix}.vcf.gz \\ $regions \\
--threads $task.cpus \\
--output ${prefix}.${extension} \\
$args \\ $args \\
*.vcf.gz *.vcf.gz
cat <<-END_VERSIONS > versions.yml cat <<-END_VERSIONS > versions.yml
"${task.process}": "${task.process}":
bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//')

View file

@ -28,16 +28,40 @@ input:
description: | description: |
List containing the tbi index files corresponding to the vcfs input files List containing the tbi index files corresponding to the vcfs input files
e.g. [ 'file1.vcf.tbi', 'file2.vcf.tbi' ] e.g. [ 'file1.vcf.tbi', 'file2.vcf.tbi' ]
- bed:
type: file
description: "(Optional) The bed regions to merge on"
pattern: "*.bed"
- fasta:
type: file
description: "(Optional) The fasta reference file (only necessary for the `--gvcf FILE` parameter)"
pattern: "*.{fasta,fa}"
- fasta:
type: file
description: "(Optional) The fasta reference file index (only necessary for the `--gvcf FILE` parameter)"
pattern: "*.fai"
output: output:
- meta: - meta:
type: map type: map
description: | description: |
Groovy Map containing sample information Groovy Map containing sample information
e.g. [ id:'test', single_end:false ] e.g. [ id:'test', single_end:false ]
- vcf_gz:
type: file
description: VCF merged output file (bgzipped) => when `--output-type z` is used
pattern: "*.vcf.gz"
- vcf: - vcf:
type: file type: file
description: VCF merged output file description: VCF merged output file => when `--output-type v` is used
pattern: "*.{vcf.gz}" pattern: "*.vcf"
- bcf_gz:
type: file
description: BCF merged output file (bgzipped) => when `--output-type b` is used
pattern: "*.bcf.gz"
- bcf:
type: file
description: BCF merged output file => when `--output-type u` is used
pattern: "*.bcf"
- versions: - versions:
type: file type: file
description: File containing software versions description: File containing software versions
@ -45,3 +69,4 @@ output:
authors: authors:
- "@joseespinosa" - "@joseespinosa"
- "@drpatelh" - "@drpatelh"
- "@nvnieuwk"

View file

@ -4,6 +4,8 @@ nextflow.enable.dsl = 2
//keep --no-verson argument, otherwise md5 will change on each execution //keep --no-verson argument, otherwise md5 will change on each execution
include { BCFTOOLS_MERGE } from '../../../../modules/bcftools/merge/main.nf' include { BCFTOOLS_MERGE } from '../../../../modules/bcftools/merge/main.nf'
include { BCFTOOLS_MERGE as BCFTOOLS_MERGE_GVCF } from '../../../../modules/bcftools/merge/main.nf'
include { BCFTOOLS_MERGE as BCFTOOLS_MERGE_BCF } from '../../../../modules/bcftools/merge/main.nf'
workflow test_bcftools_merge { workflow test_bcftools_merge {
input = [ [ id:'test' ], // meta map input = [ [ id:'test' ], // meta map
@ -13,5 +15,54 @@ workflow test_bcftools_merge {
file(params.test_data['sarscov2']['illumina']['test3_vcf_gz_tbi'], checkIfExists: true) ] file(params.test_data['sarscov2']['illumina']['test3_vcf_gz_tbi'], checkIfExists: true) ]
] ]
BCFTOOLS_MERGE ( input ) bed = []
fasta = []
fasta_fai = []
BCFTOOLS_MERGE ( input, bed, fasta, fasta_fai )
}
workflow test_bcftools_merge_bed {
input = [ [ id:'test' ], // meta map
[ file(params.test_data['sarscov2']['illumina']['test2_vcf_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test3_vcf_gz'], checkIfExists: true) ],
[ file(params.test_data['sarscov2']['illumina']['test2_vcf_gz_tbi'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test3_vcf_gz_tbi'], checkIfExists: true) ]
]
bed = file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true)
fasta = []
fasta_fai = []
BCFTOOLS_MERGE ( input, bed, fasta, fasta_fai )
}
workflow test_bcftools_merge_gvcf {
input = [ [ id:'test' ], // meta map
[ file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz_tbi'], checkIfExists: true) ],
[ file(params.test_data['homo_sapiens']['illumina']['test2_genome_vcf_gz'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test2_genome_vcf_gz_tbi'], checkIfExists: true) ]
]
bed = []
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fasta_fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
BCFTOOLS_MERGE_GVCF ( input, bed, fasta, fasta_fai )
}
workflow test_bcftools_merge_bcf {
input = [ [ id:'test' ], // meta map
[ file(params.test_data['sarscov2']['illumina']['test2_vcf_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test3_vcf_gz'], checkIfExists: true) ],
[ file(params.test_data['sarscov2']['illumina']['test2_vcf_gz_tbi'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test3_vcf_gz_tbi'], checkIfExists: true) ]
]
bed = []
fasta = []
fasta_fai = []
BCFTOOLS_MERGE_BCF ( input, bed, fasta, fasta_fai )
} }

View file

@ -3,7 +3,15 @@ process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
withName: BCFTOOLS_MERGE { withName: BCFTOOLS_MERGE {
ext.args = '--force-samples --no-version' ext.args = '--force-samples --no-version --output-type z'
}
withName: BCFTOOLS_MERGE_GVCF {
ext.args = {"--force-samples --no-version -m none --output-type z --gvcf $fasta"}
}
withName: BCFTOOLS_MERGE_BCF {
ext.args = "--force-samples --no-version --output-type b"
} }
} }

View file

@ -6,3 +6,30 @@
files: files:
- path: output/bcftools/test.vcf.gz - path: output/bcftools/test.vcf.gz
md5sum: 744b28bef1469aabdcfd354293ef725d md5sum: 744b28bef1469aabdcfd354293ef725d
- name: bcftools merge test_bcftools_merge_bed
command: nextflow run ./tests/modules/bcftools/merge -entry test_bcftools_merge_bed -c ./tests/config/nextflow.config -c ./tests/modules/bcftools/merge/nextflow.config
tags:
- bcftools/merge
- bcftools
files:
- path: output/bcftools/test.vcf.gz
md5sum: 9597e71ae1706f76f00707687be43240
- name: bcftools merge test_bcftools_merge_gvcf
command: nextflow run ./tests/modules/bcftools/merge -entry test_bcftools_merge_gvcf -c ./tests/config/nextflow.config -c ./tests/modules/bcftools/merge/nextflow.config
tags:
- bcftools/merge
- bcftools
files:
- path: output/bcftools/test.vcf.gz
md5sum: 8aef4d860228cb651e0cb920376f5118
- name: bcftools merge test_bcftools_merge_bcf
command: nextflow run ./tests/modules/bcftools/merge -entry test_bcftools_merge_bcf -c ./tests/config/nextflow.config -c ./tests/modules/bcftools/merge/nextflow.config
tags:
- bcftools/merge
- bcftools
files:
- path: output/bcftools/test.bcf.gz
md5sum: 3ca1ab0d9a6ede049c6eeadf88177eb9