Gatk4 selectvariants (#1346)

* initial commit

* tested

Co-authored-by: Cipriano <rrn8@cdc.gov>
Co-authored-by: Sateesh <33637490+sateeshperi@users.noreply.github.com>
Co-authored-by: Maxime U. Garcia <max.u.garcia@gmail.com>
This commit is contained in:
Michael J Cipriano 2022-02-24 03:51:48 -05:00 committed by GitHub
parent ee915a43e4
commit 6400317623
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 155 additions and 0 deletions

View file

@ -0,0 +1,41 @@
process GATK4_SELECTVARIANTS {
tag "$meta.id"
label 'process_medium'
conda (params.enable_conda ? "bioconda::gatk4=4.2.5.0" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/gatk4:4.2.5.0--hdfd78af_0':
'quay.io/biocontainers/gatk4:4.2.5.0--hdfd78af_0' }"
input:
tuple val(meta), path(vcf), path(vcf_idx)
output:
tuple val(meta), path("*.selectvariants.vcf.gz") , emit: vcf
tuple val(meta), path("*.selectvariants.vcf.gz.tbi") , emit: tbi
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def avail_mem = 3
if (!task.memory) {
log.info '[GATK VariantFiltration] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
} else {
avail_mem = task.memory.toGiga()
}
"""
gatk --java-options "-Xmx${avail_mem}G" SelectVariants \\
-V $vcf \\
-O ${prefix}.selectvariants.vcf.gz \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":
gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//')
END_VERSIONS
"""
}

View file

@ -0,0 +1,55 @@
name: gatk4_selectvariants
description: Select a subset of variants from a VCF file
keywords:
- gatk
- gatk4
- selectvariants
- vcf
tools:
- gatk4:
description: |
Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools
with a primary focus on variant discovery and genotyping. Its powerful processing engine
and high-performance computing features make it capable of taking on projects of any size.
homepage: https://gatk.broadinstitute.org/hc/en-us
documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360036362532-SelectVariants
tool_dev_url: https://github.com/broadinstitute/gatk
doi: 10.1158/1538-7445.AM2017-3590
licence: ["Apache-2.0"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test']
- vcf:
type: list
description: VCF(.gz) file
pattern: "*.{vcf,vcf.gz}"
- vcf_idx:
type: list
description: VCF file index
pattern: "*.{idx,tbi}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- vcf:
type: file
description: Compressed VCF file
pattern: "*.selectvariants.vcf.gz"
- vcf_tbi:
type: list
description: VCF file index
pattern: "*.{idx,tbi}"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@mjcipriano"

View file

@ -672,6 +672,10 @@ gatk4/samtofastq:
- modules/gatk4/samtofastq/**
- tests/modules/gatk4/samtofastq/**
gatk4/selectvariants:
- modules/gatk4/selectvariants/**
- tests/modules/gatk4/selectvariants/**
gatk4/splitncigarreads:
- modules/gatk4/splitncigarreads/**
- tests/modules/gatk4/splitncigarreads/**

View file

@ -0,0 +1,29 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { GATK4_SELECTVARIANTS } from '../../../../modules/gatk4/selectvariants/main.nf'
// Basic parameters with uncompressed VCF input
workflow test_gatk4_selectvariants_vcf_input {
input = [
[ id:'test' ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_idx'], checkIfExists: true)
]
GATK4_SELECTVARIANTS ( input)
}
// Basic parameters with compressed VCF input
workflow test_gatk4_selectvariants_gz_input {
input = [
[ id:'test' ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz_tbi'], checkIfExists: true)
]
GATK4_SELECTVARIANTS ( input )
}

View file

@ -0,0 +1,5 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}

View file

@ -0,0 +1,21 @@
- name: gatk4 selectvariants test_gatk4_selectvariants_vcf_input
command: nextflow run tests/modules/gatk4/selectvariants -entry test_gatk4_selectvariants_vcf_input -c tests/config/nextflow.config
tags:
- gatk4/selectvariants
- gatk4
files:
- path: output/gatk4/test.selectvariants.vcf.gz
- path: output/gatk4/test.selectvariants.vcf.gz.tbi
- path: output/gatk4/versions.yml
md5sum: a35d78af179f43652274bc7405d5a785
- name: gatk4 selectvariants test_gatk4_selectvariants_gz_input
command: nextflow run tests/modules/gatk4/selectvariants -entry test_gatk4_selectvariants_gz_input -c tests/config/nextflow.config
tags:
- gatk4/selectvariants
- gatk4
files:
- path: output/gatk4/test.selectvariants.vcf.gz
- path: output/gatk4/test.selectvariants.vcf.gz.tbi
- path: output/gatk4/versions.yml
md5sum: c943f3579a369968ca63444eb43fb6e7