From 43a1c1c6d18d7d9395846503046f1649ba72a06e Mon Sep 17 00:00:00 2001 From: praveenraj2018 <43108054+praveenraj2018@users.noreply.github.com> Date: Thu, 18 Nov 2021 21:47:40 +0100 Subject: [PATCH] Update in GATK4 variantfiltration: Added vcf_index to vcf tuple; output to vcf.gz format. (#1083) * Added vcf_index to vcf tuple; output to vcf.gz format. * Fix: extra new line in meta.yml. * addressed review feedback * fix: editorconfig error * fix: gatk memory flag * fix: editorconfig error * fix: Indentation fix: Indentation * Fix: lint editorconfig error Removed one extra space Co-authored-by: Robert A. Petit III --- modules/gatk4/variantfiltration/main.nf | 20 ++++++++---- modules/gatk4/variantfiltration/meta.yml | 16 +++++++--- tests/modules/gatk4/variantfiltration/main.nf | 32 +++++++++++++++---- .../modules/gatk4/variantfiltration/test.yml | 20 +++++++++--- 4 files changed, 65 insertions(+), 23 deletions(-) diff --git a/modules/gatk4/variantfiltration/main.nf b/modules/gatk4/variantfiltration/main.nf index a4e950ae..e0f0727a 100644 --- a/modules/gatk4/variantfiltration/main.nf +++ b/modules/gatk4/variantfiltration/main.nf @@ -19,23 +19,29 @@ process GATK4_VARIANTFILTRATION { } input: - tuple val(meta), path(vcf) + tuple val(meta), path(vcf), path(vcf_tbi) path fasta path fai path dict output: - tuple val(meta), path("*.vcf"), emit: vcf - path "versions.yml" , emit: versions - + tuple val(meta), path("*.vcf.gz"), emit: vcf + tuple val(meta), path("*.tbi") , emit: tbi + path "versions.yml" , emit: versions script: - def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" + def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK HaplotypeCaller] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.toGiga() + } """ - gatk VariantFiltration \\ + gatk --java-options "-Xmx${avail_mem}G" VariantFiltration \\ -R $fasta \\ -V $vcf \\ - -O ${prefix}.vcf \\ + -O ${prefix}.vcf.gz \\ $options.args cat <<-END_VERSIONS > versions.yml diff --git a/modules/gatk4/variantfiltration/meta.yml b/modules/gatk4/variantfiltration/meta.yml index 6d4983a6..71f0b8b2 100644 --- a/modules/gatk4/variantfiltration/meta.yml +++ b/modules/gatk4/variantfiltration/meta.yml @@ -21,8 +21,12 @@ input: e.g. [ id:'test'] - vcf: type: list - description: Input VCF file - pattern: "*.{vcf}" + description: List of VCF(.gz) files + pattern: "*.{vcf,vcf.gz}" + - vcf_tbi: + type: list + description: List of VCF file indexes + pattern: "*.{idx,tbi}" - fasta: type: file description: Fasta file of reference genome @@ -38,8 +42,12 @@ input: output: - vcf: type: file - description: filtered VCF file - pattern: "*.filtered.{vcf}" + description: Compressed VCF file + pattern: "*.vcf.gz" + - tbi: + type: file + description: Index of VCF file + pattern: "*.vcf.gz.tbi" - versions: type: file description: File containing software versions diff --git a/tests/modules/gatk4/variantfiltration/main.nf b/tests/modules/gatk4/variantfiltration/main.nf index 04bebf6f..67c9daec 100644 --- a/tests/modules/gatk4/variantfiltration/main.nf +++ b/tests/modules/gatk4/variantfiltration/main.nf @@ -5,14 +5,32 @@ nextflow.enable.dsl = 2 test_options = ['args': '--filter-name "test_filter" --filter-expression "MQ0 > 0"', 'suffix': '.filtered'] include { GATK4_VARIANTFILTRATION } from '../../../../modules/gatk4/variantfiltration/main.nf' addParams( options: test_options ) -workflow test_gatk4_variantfiltration { +// Basic parameters with uncompressed VCF input +workflow test_gatk4_variantfiltration_vcf_input { + input = [ [ id:'test' ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true) ] - ] - fasta = [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ] - fai = [ file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) ] - genome_dict = [ file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) ] + file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_idx'], checkIfExists: true) ] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fastaIndex = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + fastaDict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) - GATK4_VARIANTFILTRATION ( input, fasta, fai, genome_dict ) + GATK4_VARIANTFILTRATION ( input, fasta, fastaIndex, fastaDict ) } + +// Basic parameters with compressed VCF input +workflow test_gatk4_variantfiltration_gz_input { + + input = [ [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz_tbi'], checkIfExists: true) ] + + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fastaIndex = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + fastaDict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) + + GATK4_VARIANTFILTRATION ( input, fasta, fastaIndex, fastaDict ) +} + + diff --git a/tests/modules/gatk4/variantfiltration/test.yml b/tests/modules/gatk4/variantfiltration/test.yml index 1a2bf6d2..e3177cfc 100644 --- a/tests/modules/gatk4/variantfiltration/test.yml +++ b/tests/modules/gatk4/variantfiltration/test.yml @@ -1,9 +1,19 @@ -- name: gatk4 variantfiltration test_gatk4_variantfiltration - command: nextflow run tests/modules/gatk4/variantfiltration -entry test_gatk4_variantfiltration -c tests/config/nextflow.config +- name: gatk4 variantfiltration test_gatk4_variantfiltration_vcf_input + command: nextflow run tests/modules/gatk4/variantfiltration -entry test_gatk4_variantfiltration_vcf_input -c tests/config/nextflow.config tags: - gatk4/variantfiltration - gatk4 files: - - path: output/gatk4/test.filtered.vcf - contains: - - "AC=2;AN=2;DP=1;DP4=0,0,1,0;MQ=60;MQ0F=0;SGB=-0.379885" + - path: output/gatk4/test.filtered.vcf.gz + contains: ['BaseQRankSum=-1.318;DP=17;ExcessHet=3.0103;MLEAC=1,0,0;MLEAF=0.500,0.00,0.00;MQRankSum=0.000;RAW_MQandDP=61200,17;ReadPosRankSum=2.365'] + - path: output/gatk4/test.filtered.vcf.gz.tbi + +- name: gatk4 variantfiltration test_gatk4_variantfiltration_gz_input + command: nextflow run tests/modules/gatk4/variantfiltration -entry test_gatk4_variantfiltration_gz_input -c tests/config/nextflow.config + tags: + - gatk4/variantfiltration + - gatk4 + files: + - path: output/gatk4/test.filtered.vcf.gz + contains: ['BaseQRankSum=-1.318;DP=17;ExcessHet=3.0103;MLEAC=1,0,0;MLEAF=0.500,0.00,0.00;MQRankSum=0.000;RAW_MQandDP=61200,17;ReadPosRankSum=2.365'] + - path: output/gatk4/test.filtered.vcf.gz.tbi