mirror of
https://github.com/MillironX/nf-core_modules.git
synced 2024-12-22 02:58:17 +00:00
Add module: gatk4/leftalignandtrimvariants (#1808)
* Added: gatk4/leftalignandtrimvariants Additions: - GATK4/LeftAlignAndTrimVariants module - Use sars-ncov2 test data as this normalises a larger INDEL correctly. Fixes #1801 * fixup: Added index to output spec * fixup: Pattern of tbi output corrected to 'tbi' * gatk4/leftalignandtrimvariants: Added intervals Changes: - gatk4/leftalignandtrimvariants now supports optional interval as BED file - Tests added with and without interval. Not test BED file excludes all variants so no variants are actually normalised. Fixes #1801 * fixup: leftalignandtrimvariants vcf->tbi fix * fixup: gatk4/leftalignandtrimvariants Intervals added to meta.yml
This commit is contained in:
parent
b573ff053e
commit
009f7c691c
6 changed files with 187 additions and 0 deletions
48
modules/gatk4/leftalignandtrimvariants/main.nf
Normal file
48
modules/gatk4/leftalignandtrimvariants/main.nf
Normal file
|
@ -0,0 +1,48 @@
|
|||
process GATK4_LEFTALIGNANDTRIMVARIANTS {
|
||||
tag "$meta.id"
|
||||
label 'process_low'
|
||||
|
||||
conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null)
|
||||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||
'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0':
|
||||
'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }"
|
||||
|
||||
input:
|
||||
tuple val(meta), path(vcf), path(tbi), path(intervals)
|
||||
path fasta
|
||||
path fai
|
||||
path dict
|
||||
|
||||
output:
|
||||
tuple val(meta), path("*.vcf.gz"), emit: vcf
|
||||
tuple val(meta), path("*.tbi") , emit: tbi
|
||||
path "versions.yml" , emit: versions
|
||||
|
||||
when:
|
||||
task.ext.when == null || task.ext.when
|
||||
|
||||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
def interval_command = intervals ? "--intervals $intervals" : ""
|
||||
def avail_mem = 3
|
||||
if (!task.memory) {
|
||||
log.info '[GATK LeftAlignAndTrimVariants] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
|
||||
} else {
|
||||
avail_mem = task.memory.toGiga()
|
||||
}
|
||||
"""
|
||||
gatk --java-options "-Xmx${avail_mem}G" LeftAlignAndTrimVariants \\
|
||||
$interval_command \\
|
||||
--variant $vcf \\
|
||||
--output ${prefix}.vcf.gz \\
|
||||
--reference $fasta \\
|
||||
--tmp-dir . \\
|
||||
$args
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//')
|
||||
END_VERSIONS
|
||||
"""
|
||||
}
|
69
modules/gatk4/leftalignandtrimvariants/meta.yml
Normal file
69
modules/gatk4/leftalignandtrimvariants/meta.yml
Normal file
|
@ -0,0 +1,69 @@
|
|||
name: "gatk4_leftalignandtrimvariants"
|
||||
description: Left align and trim variants using GATK4 LeftAlignAndTrimVariants.
|
||||
keywords:
|
||||
- normalize
|
||||
- norm
|
||||
- vcf
|
||||
tools:
|
||||
- gatk4:
|
||||
description: |
|
||||
Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools
|
||||
with a primary focus on variant discovery and genotyping. Its powerful processing engine
|
||||
and high-performance computing features make it capable of taking on projects of any size.
|
||||
homepage: https://gatk.broadinstitute.org/hc/en-us
|
||||
documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s
|
||||
doi: 10.1158/1538-7445.AM2017-3590
|
||||
licence: ["Apache-2.0"]
|
||||
|
||||
input:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- vcf:
|
||||
type: file
|
||||
description: |
|
||||
The vcf file to be normalized
|
||||
e.g. 'file1.vcf.gz'
|
||||
- tbi:
|
||||
type: file
|
||||
description: |
|
||||
Index of the vcf file to be normalized
|
||||
e.g. 'file1.vcf.gz.tbi'
|
||||
- intervals:
|
||||
type: file
|
||||
description: Bed file with the genomic regions included in the library (optional)
|
||||
- fasta:
|
||||
type: file
|
||||
description: The reference fasta file
|
||||
pattern: "*.fasta"
|
||||
- fai:
|
||||
type: file
|
||||
description: Index of reference fasta file
|
||||
pattern: "*.fasta.fai"
|
||||
- dict:
|
||||
type: file
|
||||
description: GATK sequence dictionary
|
||||
pattern: "*.dict"
|
||||
|
||||
output:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- versions:
|
||||
type: file
|
||||
description: File containing software versions
|
||||
pattern: "versions.yml"
|
||||
- vcf:
|
||||
type: file
|
||||
description: VCF normalized output file
|
||||
pattern: "*.{vcf.gz}"
|
||||
- tbi:
|
||||
type: file
|
||||
description: Tbi index for VCF file
|
||||
pattern: "*.tbi"
|
||||
authors:
|
||||
- "@adamrtalbot"
|
|
@ -863,6 +863,10 @@ gatk4/learnreadorientationmodel:
|
|||
- modules/gatk4/learnreadorientationmodel/**
|
||||
- tests/modules/gatk4/learnreadorientationmodel/**
|
||||
|
||||
gatk4/leftalignandtrimvariants:
|
||||
- modules/gatk4/leftalignandtrimvariants/**
|
||||
- tests/modules/gatk4/leftalignandtrimvariants/**
|
||||
|
||||
gatk4/markduplicates:
|
||||
- modules/gatk4/markduplicates/**
|
||||
- tests/modules/gatk4/markduplicates/**
|
||||
|
|
35
tests/modules/gatk4/leftalignandtrimvariants/main.nf
Normal file
35
tests/modules/gatk4/leftalignandtrimvariants/main.nf
Normal file
|
@ -0,0 +1,35 @@
|
|||
#!/usr/bin/env nextflow
|
||||
|
||||
nextflow.enable.dsl = 2
|
||||
|
||||
include { GATK4_LEFTALIGNANDTRIMVARIANTS } from '../../../../modules/gatk4/leftalignandtrimvariants/main.nf'
|
||||
|
||||
workflow test_gatk4_leftalignandtrimvariants_interval {
|
||||
|
||||
input = [ [ id:'test' ], // meta map
|
||||
file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true),
|
||||
file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true),
|
||||
file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true),
|
||||
]
|
||||
|
||||
fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
|
||||
fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)
|
||||
dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true)
|
||||
|
||||
GATK4_LEFTALIGNANDTRIMVARIANTS ( input, fasta, fai, dict )
|
||||
}
|
||||
|
||||
workflow test_gatk4_leftalignandtrimvariants_no_interval {
|
||||
|
||||
input = [ [ id:'test' ], // meta map
|
||||
file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true),
|
||||
file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true),
|
||||
[]
|
||||
]
|
||||
|
||||
fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
|
||||
fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)
|
||||
dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true)
|
||||
|
||||
GATK4_LEFTALIGNANDTRIMVARIANTS ( input, fasta, fai, dict )
|
||||
}
|
|
@ -0,0 +1,9 @@
|
|||
process {
|
||||
|
||||
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
|
||||
|
||||
withName: 'GATK4_LEFTALIGNANDTRIMVARIANTS' {
|
||||
ext.args = "--split-multi-allelics --dont-trim-alleles --keep-original-ac"
|
||||
ext.prefix = { "${meta.id}.normalised" }
|
||||
}
|
||||
}
|
22
tests/modules/gatk4/leftalignandtrimvariants/test.yml
Normal file
22
tests/modules/gatk4/leftalignandtrimvariants/test.yml
Normal file
|
@ -0,0 +1,22 @@
|
|||
- name: gatk4 leftalignandtrimvariants test_gatk4_leftalignandtrimvariants_interval
|
||||
command: nextflow run ./tests/modules/gatk4/leftalignandtrimvariants -entry test_gatk4_leftalignandtrimvariants_interval -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/leftalignandtrimvariants/nextflow.config
|
||||
tags:
|
||||
- gatk4
|
||||
- gatk4/leftalignandtrimvariants
|
||||
files:
|
||||
- path: output/gatk4/test.normalised.vcf.gz
|
||||
contains:
|
||||
- "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT"
|
||||
- path: output/gatk4/test.normalised.vcf.gz.tbi
|
||||
|
||||
- name: gatk4 leftalignandtrimvariants test_gatk4_leftalignandtrimvariants_no_interval
|
||||
command: nextflow run ./tests/modules/gatk4/leftalignandtrimvariants -entry test_gatk4_leftalignandtrimvariants_no_interval -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/leftalignandtrimvariants/nextflow.config
|
||||
tags:
|
||||
- gatk4
|
||||
- gatk4/leftalignandtrimvariants
|
||||
files:
|
||||
- path: output/gatk4/test.normalised.vcf.gz
|
||||
contains:
|
||||
- "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT"
|
||||
- "MT192765.1\t10502\t.\tTAGATTATGACTGTGTCTCTTTTTGTTACATGCACCA\tTAGAT"
|
||||
- path: output/gatk4/test.normalised.vcf.gz.tbi
|
Loading…
Reference in a new issue