Update in GATK4 variantfiltration: Added vcf_index to vcf tuple; output to vcf.gz format. (#1083)

* Added vcf_index to vcf tuple; output to vcf.gz format.

* Fix: extra new line in meta.yml.

* addressed review feedback

* fix: editorconfig error

* fix: gatk memory flag

* fix: editorconfig error

* fix: Indentation 

fix: Indentation

* Fix: lint editorconfig error

Removed one extra space

Co-authored-by: Robert A. Petit III <robbie.petit@gmail.com>
This commit is contained in:
praveenraj2018 2021-11-18 21:47:40 +01:00 committed by GitHub
parent f052dc445c
commit 43a1c1c6d1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 65 additions and 23 deletions

View file

@ -19,23 +19,29 @@ process GATK4_VARIANTFILTRATION {
}
input:
tuple val(meta), path(vcf)
tuple val(meta), path(vcf), path(vcf_tbi)
path fasta
path fai
path dict
output:
tuple val(meta), path("*.vcf"), emit: vcf
tuple val(meta), path("*.vcf.gz"), emit: vcf
tuple val(meta), path("*.tbi") , emit: tbi
path "versions.yml" , emit: versions
script:
def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
def avail_mem = 3
if (!task.memory) {
log.info '[GATK HaplotypeCaller] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
} else {
avail_mem = task.memory.toGiga()
}
"""
gatk VariantFiltration \\
gatk --java-options "-Xmx${avail_mem}G" VariantFiltration \\
-R $fasta \\
-V $vcf \\
-O ${prefix}.vcf \\
-O ${prefix}.vcf.gz \\
$options.args
cat <<-END_VERSIONS > versions.yml

View file

@ -21,8 +21,12 @@ input:
e.g. [ id:'test']
- vcf:
type: list
description: Input VCF file
pattern: "*.{vcf}"
description: List of VCF(.gz) files
pattern: "*.{vcf,vcf.gz}"
- vcf_tbi:
type: list
description: List of VCF file indexes
pattern: "*.{idx,tbi}"
- fasta:
type: file
description: Fasta file of reference genome
@ -38,8 +42,12 @@ input:
output:
- vcf:
type: file
description: filtered VCF file
pattern: "*.filtered.{vcf}"
description: Compressed VCF file
pattern: "*.vcf.gz"
- tbi:
type: file
description: Index of VCF file
pattern: "*.vcf.gz.tbi"
- versions:
type: file
description: File containing software versions

View file

@ -5,14 +5,32 @@ nextflow.enable.dsl = 2
test_options = ['args': '--filter-name "test_filter" --filter-expression "MQ0 > 0"', 'suffix': '.filtered']
include { GATK4_VARIANTFILTRATION } from '../../../../modules/gatk4/variantfiltration/main.nf' addParams( options: test_options )
workflow test_gatk4_variantfiltration {
// Basic parameters with uncompressed VCF input
workflow test_gatk4_variantfiltration_vcf_input {
input = [ [ id:'test' ], // meta map
[ file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true) ]
]
fasta = [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ]
fai = [ file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) ]
genome_dict = [ file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) ]
file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_idx'], checkIfExists: true) ]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fastaIndex = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
fastaDict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
GATK4_VARIANTFILTRATION ( input, fasta, fai, genome_dict )
GATK4_VARIANTFILTRATION ( input, fasta, fastaIndex, fastaDict )
}
// Basic parameters with compressed VCF input
workflow test_gatk4_variantfiltration_gz_input {
input = [ [ id:'test' ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz_tbi'], checkIfExists: true) ]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fastaIndex = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
fastaDict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
GATK4_VARIANTFILTRATION ( input, fasta, fastaIndex, fastaDict )
}

View file

@ -1,9 +1,19 @@
- name: gatk4 variantfiltration test_gatk4_variantfiltration
command: nextflow run tests/modules/gatk4/variantfiltration -entry test_gatk4_variantfiltration -c tests/config/nextflow.config
- name: gatk4 variantfiltration test_gatk4_variantfiltration_vcf_input
command: nextflow run tests/modules/gatk4/variantfiltration -entry test_gatk4_variantfiltration_vcf_input -c tests/config/nextflow.config
tags:
- gatk4/variantfiltration
- gatk4
files:
- path: output/gatk4/test.filtered.vcf
contains:
- "AC=2;AN=2;DP=1;DP4=0,0,1,0;MQ=60;MQ0F=0;SGB=-0.379885"
- path: output/gatk4/test.filtered.vcf.gz
contains: ['BaseQRankSum=-1.318;DP=17;ExcessHet=3.0103;MLEAC=1,0,0;MLEAF=0.500,0.00,0.00;MQRankSum=0.000;RAW_MQandDP=61200,17;ReadPosRankSum=2.365']
- path: output/gatk4/test.filtered.vcf.gz.tbi
- name: gatk4 variantfiltration test_gatk4_variantfiltration_gz_input
command: nextflow run tests/modules/gatk4/variantfiltration -entry test_gatk4_variantfiltration_gz_input -c tests/config/nextflow.config
tags:
- gatk4/variantfiltration
- gatk4
files:
- path: output/gatk4/test.filtered.vcf.gz
contains: ['BaseQRankSum=-1.318;DP=17;ExcessHet=3.0103;MLEAC=1,0,0;MLEAF=0.500,0.00,0.00;MQRankSum=0.000;RAW_MQandDP=61200,17;ReadPosRankSum=2.365']
- path: output/gatk4/test.filtered.vcf.gz.tbi