new module gatk4/reblockgvcf

This commit is contained in:
Nicolas Vannieuwkerke 2022-06-09 14:56:55 +02:00
parent 438f6c5281
commit fe9e4ece00
6 changed files with 222 additions and 0 deletions

View file

@ -0,0 +1,52 @@
process GATK4_REBLOCKGVCF {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0':
'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }"
input:
tuple val(meta), path(gvcf), path(tbi), path(intervals)
path fasta
path fai
path dict
path dbsnp
path dbsnp_tbi
output:
tuple val(meta), path("*.reblock.g.vcf.gz"), path("*.tbi") , emit: bam
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def dbsnp_command = dbsnp ? "--dbsnp $dbsnp" : ""
def interval_command = intervals ? "--intervals $intervals" : ""
def avail_mem = 3
if (!task.memory) {
log.info '[GATK ReblockGVCF] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
} else {
avail_mem = task.memory.giga
}
"""
gatk --java-options "-Xmx${avail_mem}g" ReblockGVCF \\
--variant $gvcf \\
--output ${prefix}.reblock.g.vcf.gz \\
--reference $fasta \\
$dbsnp_command \\
$interval_command \\
--tmp-dir . \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":
gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//')
END_VERSIONS
"""
}

View file

@ -0,0 +1,74 @@
name: "gatk4_reblockgvcf"
description: Condenses homRef blocks in a single-sample GVCF
keywords:
- gatk4
- reblockgvcf
- gvcf
tools:
- gatk4:
description: |
Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools
with a primary focus on variant discovery and genotyping. Its powerful processing engine
and high-performance computing features make it capable of taking on projects of any size.
homepage: https://gatk.broadinstitute.org/hc/en-us
documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s
doi: 10.1158/1538-7445.AM2017-3590
licence: ["Apache-2.0"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- gvcf:
type: file
description: GVCF file created using HaplotypeCaller using the '-ERC GVCF' or '-ERC BP_RESOLUTION' mode
pattern: "*.{vcf,gvcf}.gz"
- tbi:
type: file
description: Index of the GVCF file
pattern: "*.tbi"
- intervals:
type: file
description: Bed file with the genomic regions included in the library (optional)
- fasta:
type: file
description: The reference fasta file
pattern: "*.fasta"
- fai:
type: file
description: Index of reference fasta file
pattern: "fasta.fai"
- dict:
type: file
description: GATK sequence dictionary
pattern: "*.dict"
- dbsnp:
type: file
description: VCF file containing known sites (optional)
- dbsnp_tbi:
type: file
description: VCF index of dbsnp (optional)
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- gvcf:
type: file
description: Filtered GVCF
pattern: "*reblock.g.vcf.gz"
- tbi:
type: file
description: Index of the filtered GVCF
pattern: "*reblock.g.vcf.gz.tbi"
authors:
- "@nvnieuwk"

View file

@ -859,6 +859,10 @@ gatk4/mutect2:
- modules/gatk4/mutect2/**
- tests/modules/gatk4/mutect2/**
gatk4/reblockgvcf:
- modules/gatk4/reblockgvcf/**
- tests/modules/gatk4/reblockgvcf/**
gatk4/revertsam:
- modules/gatk4/revertsam/**
- tests/modules/gatk4/revertsam/**

View file

@ -0,0 +1,55 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { GATK4_REBLOCKGVCF } from '../../../../modules/gatk4/reblockgvcf/main.nf'
workflow test_gatk4_reblockgvcf {
input = [
[ id:'test', single_end:false ], // meta map
file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true),
[]
]
fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
fasta_index = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true)
GATK4_REBLOCKGVCF ( input, fasta, fasta_index, dict, [], [] )
}
workflow test_gatk4_reblockgvcf_intervals {
input = [
[ id:'test', single_end:false ], // meta map
file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true),
file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true)
]
fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
fasta_index = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true)
GATK4_REBLOCKGVCF ( input, fasta, fasta_index, dict, [], [] )
}
workflow test_gatk4_reblockgvcf_dbsnp {
input = [
[ id:'test', single_end:false ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_haplotc_cnn_vcf_gz'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_haplotc_cnn_vcf_gz_tbi'], checkIfExists: true),
[]
]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fasta_index = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
dbsnp = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz'], checkIfExists: true)
dbsnp_tbi = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz_tbi'], checkIfExists: true)
GATK4_REBLOCKGVCF ( input, fasta, fasta_index, dict, dbsnp, dbsnp_tbi )
}

View file

@ -0,0 +1,5 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}

View file

@ -0,0 +1,32 @@
- name: gatk4 reblockgvcf test_gatk4_reblockgvcf
command: nextflow run ./tests/modules/gatk4/reblockgvcf -entry test_gatk4_reblockgvcf -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/reblockgvcf/nextflow.config
tags:
- gatk4/reblockgvcf
- gatk4
files:
- path: output/gatk4/test.reblock.g.vcf.gz
contains: '[ # TODO nf-core: file md5sum was variable, please replace this text with a string found in the file instead ]'
- path: output/gatk4/test.reblock.g.vcf.gz.tbi
md5sum: e1aab7d826a151828fd0671ec5aed2e0
- name: gatk4 reblockgvcf test_gatk4_reblockgvcf_intervals
command: nextflow run ./tests/modules/gatk4/reblockgvcf -entry test_gatk4_reblockgvcf_intervals -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/reblockgvcf/nextflow.config
tags:
- gatk4/reblockgvcf
- gatk4
files:
- path: output/gatk4/test.reblock.g.vcf.gz
contains: '[ # TODO nf-core: file md5sum was variable, please replace this text with a string found in the file instead ]'
- path: output/gatk4/test.reblock.g.vcf.gz.tbi
md5sum: e7ca7e9fe76ce12198fd54ec9a64fad4
- name: gatk4 reblockgvcf test_gatk4_reblockgvcf_dbsnp
command: nextflow run ./tests/modules/gatk4/reblockgvcf -entry test_gatk4_reblockgvcf_dbsnp -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/reblockgvcf/nextflow.config
tags:
- gatk4/reblockgvcf
- gatk4
files:
- path: output/gatk4/test.reblock.g.vcf.gz
contains: '[ # TODO nf-core: file md5sum was variable, please replace this text with a string found in the file instead ]'
- path: output/gatk4/test.reblock.g.vcf.gz.tbi
md5sum: 017edea27a253eb51cc4505d00dcb295