Update in GATK4 variantfiltration: Added vcf_index to vcf tuple; output to vcf.gz format. (#1083)

* Added vcf_index to vcf tuple; output to vcf.gz format.

* Fix: extra new line in meta.yml.

* addressed review feedback

* fix: editorconfig error

* fix: gatk memory flag

* fix: editorconfig error

* fix: Indentation 

fix: Indentation

* Fix: lint editorconfig error

Removed one extra space

Co-authored-by: Robert A. Petit III <robbie.petit@gmail.com>
This commit is contained in:
praveenraj2018 2021-11-18 21:47:40 +01:00 committed by GitHub
parent f052dc445c
commit 43a1c1c6d1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 65 additions and 23 deletions

View file

@ -19,23 +19,29 @@ process GATK4_VARIANTFILTRATION {
} }
input: input:
tuple val(meta), path(vcf) tuple val(meta), path(vcf), path(vcf_tbi)
path fasta path fasta
path fai path fai
path dict path dict
output: output:
tuple val(meta), path("*.vcf"), emit: vcf tuple val(meta), path("*.vcf.gz"), emit: vcf
path "versions.yml" , emit: versions tuple val(meta), path("*.tbi") , emit: tbi
path "versions.yml" , emit: versions
script: script:
def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
def avail_mem = 3
if (!task.memory) {
log.info '[GATK HaplotypeCaller] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
} else {
avail_mem = task.memory.toGiga()
}
""" """
gatk VariantFiltration \\ gatk --java-options "-Xmx${avail_mem}G" VariantFiltration \\
-R $fasta \\ -R $fasta \\
-V $vcf \\ -V $vcf \\
-O ${prefix}.vcf \\ -O ${prefix}.vcf.gz \\
$options.args $options.args
cat <<-END_VERSIONS > versions.yml cat <<-END_VERSIONS > versions.yml

View file

@ -21,8 +21,12 @@ input:
e.g. [ id:'test'] e.g. [ id:'test']
- vcf: - vcf:
type: list type: list
description: Input VCF file description: List of VCF(.gz) files
pattern: "*.{vcf}" pattern: "*.{vcf,vcf.gz}"
- vcf_tbi:
type: list
description: List of VCF file indexes
pattern: "*.{idx,tbi}"
- fasta: - fasta:
type: file type: file
description: Fasta file of reference genome description: Fasta file of reference genome
@ -38,8 +42,12 @@ input:
output: output:
- vcf: - vcf:
type: file type: file
description: filtered VCF file description: Compressed VCF file
pattern: "*.filtered.{vcf}" pattern: "*.vcf.gz"
- tbi:
type: file
description: Index of VCF file
pattern: "*.vcf.gz.tbi"
- versions: - versions:
type: file type: file
description: File containing software versions description: File containing software versions

View file

@ -5,14 +5,32 @@ nextflow.enable.dsl = 2
test_options = ['args': '--filter-name "test_filter" --filter-expression "MQ0 > 0"', 'suffix': '.filtered'] test_options = ['args': '--filter-name "test_filter" --filter-expression "MQ0 > 0"', 'suffix': '.filtered']
include { GATK4_VARIANTFILTRATION } from '../../../../modules/gatk4/variantfiltration/main.nf' addParams( options: test_options ) include { GATK4_VARIANTFILTRATION } from '../../../../modules/gatk4/variantfiltration/main.nf' addParams( options: test_options )
workflow test_gatk4_variantfiltration { // Basic parameters with uncompressed VCF input
workflow test_gatk4_variantfiltration_vcf_input {
input = [ [ id:'test' ], // meta map input = [ [ id:'test' ], // meta map
[ file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true) ] file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf'], checkIfExists: true),
] file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_idx'], checkIfExists: true) ]
fasta = [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ]
fai = [ file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) ]
genome_dict = [ file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) ]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fastaIndex = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
fastaDict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
GATK4_VARIANTFILTRATION ( input, fasta, fai, genome_dict ) GATK4_VARIANTFILTRATION ( input, fasta, fastaIndex, fastaDict )
} }
// Basic parameters with compressed VCF input
workflow test_gatk4_variantfiltration_gz_input {
input = [ [ id:'test' ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz_tbi'], checkIfExists: true) ]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fastaIndex = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
fastaDict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
GATK4_VARIANTFILTRATION ( input, fasta, fastaIndex, fastaDict )
}

View file

@ -1,9 +1,19 @@
- name: gatk4 variantfiltration test_gatk4_variantfiltration - name: gatk4 variantfiltration test_gatk4_variantfiltration_vcf_input
command: nextflow run tests/modules/gatk4/variantfiltration -entry test_gatk4_variantfiltration -c tests/config/nextflow.config command: nextflow run tests/modules/gatk4/variantfiltration -entry test_gatk4_variantfiltration_vcf_input -c tests/config/nextflow.config
tags: tags:
- gatk4/variantfiltration - gatk4/variantfiltration
- gatk4 - gatk4
files: files:
- path: output/gatk4/test.filtered.vcf - path: output/gatk4/test.filtered.vcf.gz
contains: contains: ['BaseQRankSum=-1.318;DP=17;ExcessHet=3.0103;MLEAC=1,0,0;MLEAF=0.500,0.00,0.00;MQRankSum=0.000;RAW_MQandDP=61200,17;ReadPosRankSum=2.365']
- "AC=2;AN=2;DP=1;DP4=0,0,1,0;MQ=60;MQ0F=0;SGB=-0.379885" - path: output/gatk4/test.filtered.vcf.gz.tbi
- name: gatk4 variantfiltration test_gatk4_variantfiltration_gz_input
command: nextflow run tests/modules/gatk4/variantfiltration -entry test_gatk4_variantfiltration_gz_input -c tests/config/nextflow.config
tags:
- gatk4/variantfiltration
- gatk4
files:
- path: output/gatk4/test.filtered.vcf.gz
contains: ['BaseQRankSum=-1.318;DP=17;ExcessHet=3.0103;MLEAC=1,0,0;MLEAF=0.500,0.00,0.00;MQRankSum=0.000;RAW_MQandDP=61200,17;ReadPosRankSum=2.365']
- path: output/gatk4/test.filtered.vcf.gz.tbi