From 8656636f0d0a86aa3966052b5c2cd06141647c70 Mon Sep 17 00:00:00 2001 From: nvnieuwk <101190534+nvnieuwk@users.noreply.github.com> Date: Wed, 20 Jul 2022 11:22:51 +0200 Subject: [PATCH] update for bcftools merge (#1908) * update for bcftools merge * Update modules/bcftools/merge/main.nf Co-authored-by: Maxime U. Garcia * Update modules/bcftools/merge/main.nf Co-authored-by: Maxime U. Garcia * updated test.yml * added the bed file to the main input tuple * merged all output into one output channel * added a test for bcf.gz output * Update modules/bcftools/merge/main.nf Co-authored-by: Matthias De Smet <11850640+matthdsm@users.noreply.github.com> * updated the tests Co-authored-by: Maxime U. Garcia Co-authored-by: Matthias De Smet <11850640+matthdsm@users.noreply.github.com> --- modules/bcftools/convert/meta.yml | 16 +++++- modules/bcftools/merge/main.nf | 23 ++++++-- modules/bcftools/merge/meta.yml | 29 ++++++++++- tests/modules/bcftools/merge/main.nf | 55 +++++++++++++++++++- tests/modules/bcftools/merge/nextflow.config | 10 +++- tests/modules/bcftools/merge/test.yml | 29 ++++++++++- 6 files changed, 149 insertions(+), 13 deletions(-) diff --git a/modules/bcftools/convert/meta.yml b/modules/bcftools/convert/meta.yml index c35b9c94..48cf3a9d 100644 --- a/modules/bcftools/convert/meta.yml +++ b/modules/bcftools/convert/meta.yml @@ -53,10 +53,22 @@ output: type: file description: File containing software versions pattern: "versions.yml" + - vcf_gz: + type: file + description: VCF merged output file (bgzipped) => when `--output-type z` is used + pattern: "*.vcf.gz" - vcf: type: file - description: The converted VCF file - pattern: "*.{bam,cram,sam}" + description: VCF merged output file => when `--output-type v` is used + pattern: "*.vcf" + - bcf_gz: + type: file + description: BCF merged output file (bgzipped) => when `--output-type b` is used + pattern: "*.bcf.gz" + - bcf: + type: file + description: BCF merged output file => when `--output-type u` is used + pattern: "*.bcf" authors: - "@nvnieuwk" diff --git a/modules/bcftools/merge/main.nf b/modules/bcftools/merge/main.nf index 082b229c..af586cd1 100644 --- a/modules/bcftools/merge/main.nf +++ b/modules/bcftools/merge/main.nf @@ -9,22 +9,35 @@ process BCFTOOLS_MERGE { input: tuple val(meta), path(vcfs), path(tbis) + path bed + path fasta + path fasta_fai output: - tuple val(meta), path("*.gz"), emit: vcf - path "versions.yml" , emit: versions + tuple val(meta), path("*.{bcf,vcf}{,.gz}"), emit: merged_variants + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: def args = task.ext.args ?: '' - prefix = task.ext.prefix ?: "${meta.id}" + def prefix = task.ext.prefix ?: "${meta.id}" + + def regions = bed ? "--regions-file $bed" : "" + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf.gz" + """ - bcftools merge -Oz \\ - --output ${prefix}.vcf.gz \\ + bcftools merge \\ + $regions \\ + --threads $task.cpus \\ + --output ${prefix}.${extension} \\ $args \\ *.vcf.gz + cat <<-END_VERSIONS > versions.yml "${task.process}": bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') diff --git a/modules/bcftools/merge/meta.yml b/modules/bcftools/merge/meta.yml index 4223fd54..53dc23eb 100644 --- a/modules/bcftools/merge/meta.yml +++ b/modules/bcftools/merge/meta.yml @@ -28,16 +28,40 @@ input: description: | List containing the tbi index files corresponding to the vcfs input files e.g. [ 'file1.vcf.tbi', 'file2.vcf.tbi' ] + - bed: + type: file + description: "(Optional) The bed regions to merge on" + pattern: "*.bed" + - fasta: + type: file + description: "(Optional) The fasta reference file (only necessary for the `--gvcf FILE` parameter)" + pattern: "*.{fasta,fa}" + - fasta: + type: file + description: "(Optional) The fasta reference file index (only necessary for the `--gvcf FILE` parameter)" + pattern: "*.fai" output: - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] + - vcf_gz: + type: file + description: VCF merged output file (bgzipped) => when `--output-type z` is used + pattern: "*.vcf.gz" - vcf: type: file - description: VCF merged output file - pattern: "*.{vcf.gz}" + description: VCF merged output file => when `--output-type v` is used + pattern: "*.vcf" + - bcf_gz: + type: file + description: BCF merged output file (bgzipped) => when `--output-type b` is used + pattern: "*.bcf.gz" + - bcf: + type: file + description: BCF merged output file => when `--output-type u` is used + pattern: "*.bcf" - versions: type: file description: File containing software versions @@ -45,3 +69,4 @@ output: authors: - "@joseespinosa" - "@drpatelh" + - "@nvnieuwk" diff --git a/tests/modules/bcftools/merge/main.nf b/tests/modules/bcftools/merge/main.nf index 119e237a..0ec188d2 100644 --- a/tests/modules/bcftools/merge/main.nf +++ b/tests/modules/bcftools/merge/main.nf @@ -3,7 +3,9 @@ nextflow.enable.dsl = 2 //keep --no-verson argument, otherwise md5 will change on each execution -include { BCFTOOLS_MERGE } from '../../../../modules/bcftools/merge/main.nf' +include { BCFTOOLS_MERGE } from '../../../../modules/bcftools/merge/main.nf' +include { BCFTOOLS_MERGE as BCFTOOLS_MERGE_GVCF } from '../../../../modules/bcftools/merge/main.nf' +include { BCFTOOLS_MERGE as BCFTOOLS_MERGE_BCF } from '../../../../modules/bcftools/merge/main.nf' workflow test_bcftools_merge { input = [ [ id:'test' ], // meta map @@ -13,5 +15,54 @@ workflow test_bcftools_merge { file(params.test_data['sarscov2']['illumina']['test3_vcf_gz_tbi'], checkIfExists: true) ] ] - BCFTOOLS_MERGE ( input ) + bed = [] + fasta = [] + fasta_fai = [] + + BCFTOOLS_MERGE ( input, bed, fasta, fasta_fai ) +} + +workflow test_bcftools_merge_bed { + input = [ [ id:'test' ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test2_vcf_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test3_vcf_gz'], checkIfExists: true) ], + [ file(params.test_data['sarscov2']['illumina']['test2_vcf_gz_tbi'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test3_vcf_gz_tbi'], checkIfExists: true) ] + ] + + bed = file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true) + fasta = [] + fasta_fai = [] + + BCFTOOLS_MERGE ( input, bed, fasta, fasta_fai ) +} + +workflow test_bcftools_merge_gvcf { + input = [ [ id:'test' ], // meta map + [ file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz_tbi'], checkIfExists: true) ], + [ file(params.test_data['homo_sapiens']['illumina']['test2_genome_vcf_gz'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_genome_vcf_gz_tbi'], checkIfExists: true) ] + ] + + bed = [] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fasta_fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + + BCFTOOLS_MERGE_GVCF ( input, bed, fasta, fasta_fai ) +} + +workflow test_bcftools_merge_bcf { + input = [ [ id:'test' ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test2_vcf_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test3_vcf_gz'], checkIfExists: true) ], + [ file(params.test_data['sarscov2']['illumina']['test2_vcf_gz_tbi'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test3_vcf_gz_tbi'], checkIfExists: true) ] + ] + + bed = [] + fasta = [] + fasta_fai = [] + + BCFTOOLS_MERGE_BCF ( input, bed, fasta, fasta_fai ) } diff --git a/tests/modules/bcftools/merge/nextflow.config b/tests/modules/bcftools/merge/nextflow.config index e11e50b6..c2b7972f 100644 --- a/tests/modules/bcftools/merge/nextflow.config +++ b/tests/modules/bcftools/merge/nextflow.config @@ -3,7 +3,15 @@ process { publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } withName: BCFTOOLS_MERGE { - ext.args = '--force-samples --no-version' + ext.args = '--force-samples --no-version --output-type z' + } + + withName: BCFTOOLS_MERGE_GVCF { + ext.args = {"--force-samples --no-version -m none --output-type z --gvcf $fasta"} + } + + withName: BCFTOOLS_MERGE_BCF { + ext.args = "--force-samples --no-version --output-type b" } } diff --git a/tests/modules/bcftools/merge/test.yml b/tests/modules/bcftools/merge/test.yml index cc3621d4..5bf3be4e 100644 --- a/tests/modules/bcftools/merge/test.yml +++ b/tests/modules/bcftools/merge/test.yml @@ -1,8 +1,35 @@ - name: bcftools merge test_bcftools_merge - command: nextflow run ./tests/modules/bcftools/merge -entry test_bcftools_merge -c ./tests/config/nextflow.config -c ./tests/modules/bcftools/merge/nextflow.config + command: nextflow run ./tests/modules/bcftools/merge -entry test_bcftools_merge -c ./tests/config/nextflow.config -c ./tests/modules/bcftools/merge/nextflow.config tags: - bcftools/merge - bcftools files: - path: output/bcftools/test.vcf.gz md5sum: 744b28bef1469aabdcfd354293ef725d + +- name: bcftools merge test_bcftools_merge_bed + command: nextflow run ./tests/modules/bcftools/merge -entry test_bcftools_merge_bed -c ./tests/config/nextflow.config -c ./tests/modules/bcftools/merge/nextflow.config + tags: + - bcftools/merge + - bcftools + files: + - path: output/bcftools/test.vcf.gz + md5sum: 9597e71ae1706f76f00707687be43240 + +- name: bcftools merge test_bcftools_merge_gvcf + command: nextflow run ./tests/modules/bcftools/merge -entry test_bcftools_merge_gvcf -c ./tests/config/nextflow.config -c ./tests/modules/bcftools/merge/nextflow.config + tags: + - bcftools/merge + - bcftools + files: + - path: output/bcftools/test.vcf.gz + md5sum: 8aef4d860228cb651e0cb920376f5118 + +- name: bcftools merge test_bcftools_merge_bcf + command: nextflow run ./tests/modules/bcftools/merge -entry test_bcftools_merge_bcf -c ./tests/config/nextflow.config -c ./tests/modules/bcftools/merge/nextflow.config + tags: + - bcftools/merge + - bcftools + files: + - path: output/bcftools/test.bcf.gz + md5sum: 3ca1ab0d9a6ede049c6eeadf88177eb9