mirror of
https://github.com/MillironX/nf-core_modules.git
synced 2024-12-22 11:08:17 +00:00
Add module: gatk4/leftalignandtrimvariants (#1808)
* Added: gatk4/leftalignandtrimvariants Additions: - GATK4/LeftAlignAndTrimVariants module - Use sars-ncov2 test data as this normalises a larger INDEL correctly. Fixes #1801 * fixup: Added index to output spec * fixup: Pattern of tbi output corrected to 'tbi' * gatk4/leftalignandtrimvariants: Added intervals Changes: - gatk4/leftalignandtrimvariants now supports optional interval as BED file - Tests added with and without interval. Not test BED file excludes all variants so no variants are actually normalised. Fixes #1801 * fixup: leftalignandtrimvariants vcf->tbi fix * fixup: gatk4/leftalignandtrimvariants Intervals added to meta.yml
This commit is contained in:
parent
b573ff053e
commit
009f7c691c
6 changed files with 187 additions and 0 deletions
48
modules/gatk4/leftalignandtrimvariants/main.nf
Normal file
48
modules/gatk4/leftalignandtrimvariants/main.nf
Normal file
|
@ -0,0 +1,48 @@
|
||||||
|
process GATK4_LEFTALIGNANDTRIMVARIANTS {
|
||||||
|
tag "$meta.id"
|
||||||
|
label 'process_low'
|
||||||
|
|
||||||
|
conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null)
|
||||||
|
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||||
|
'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0':
|
||||||
|
'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }"
|
||||||
|
|
||||||
|
input:
|
||||||
|
tuple val(meta), path(vcf), path(tbi), path(intervals)
|
||||||
|
path fasta
|
||||||
|
path fai
|
||||||
|
path dict
|
||||||
|
|
||||||
|
output:
|
||||||
|
tuple val(meta), path("*.vcf.gz"), emit: vcf
|
||||||
|
tuple val(meta), path("*.tbi") , emit: tbi
|
||||||
|
path "versions.yml" , emit: versions
|
||||||
|
|
||||||
|
when:
|
||||||
|
task.ext.when == null || task.ext.when
|
||||||
|
|
||||||
|
script:
|
||||||
|
def args = task.ext.args ?: ''
|
||||||
|
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||||
|
def interval_command = intervals ? "--intervals $intervals" : ""
|
||||||
|
def avail_mem = 3
|
||||||
|
if (!task.memory) {
|
||||||
|
log.info '[GATK LeftAlignAndTrimVariants] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
|
||||||
|
} else {
|
||||||
|
avail_mem = task.memory.toGiga()
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
gatk --java-options "-Xmx${avail_mem}G" LeftAlignAndTrimVariants \\
|
||||||
|
$interval_command \\
|
||||||
|
--variant $vcf \\
|
||||||
|
--output ${prefix}.vcf.gz \\
|
||||||
|
--reference $fasta \\
|
||||||
|
--tmp-dir . \\
|
||||||
|
$args
|
||||||
|
|
||||||
|
cat <<-END_VERSIONS > versions.yml
|
||||||
|
"${task.process}":
|
||||||
|
gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//')
|
||||||
|
END_VERSIONS
|
||||||
|
"""
|
||||||
|
}
|
69
modules/gatk4/leftalignandtrimvariants/meta.yml
Normal file
69
modules/gatk4/leftalignandtrimvariants/meta.yml
Normal file
|
@ -0,0 +1,69 @@
|
||||||
|
name: "gatk4_leftalignandtrimvariants"
|
||||||
|
description: Left align and trim variants using GATK4 LeftAlignAndTrimVariants.
|
||||||
|
keywords:
|
||||||
|
- normalize
|
||||||
|
- norm
|
||||||
|
- vcf
|
||||||
|
tools:
|
||||||
|
- gatk4:
|
||||||
|
description: |
|
||||||
|
Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools
|
||||||
|
with a primary focus on variant discovery and genotyping. Its powerful processing engine
|
||||||
|
and high-performance computing features make it capable of taking on projects of any size.
|
||||||
|
homepage: https://gatk.broadinstitute.org/hc/en-us
|
||||||
|
documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s
|
||||||
|
doi: 10.1158/1538-7445.AM2017-3590
|
||||||
|
licence: ["Apache-2.0"]
|
||||||
|
|
||||||
|
input:
|
||||||
|
- meta:
|
||||||
|
type: map
|
||||||
|
description: |
|
||||||
|
Groovy Map containing sample information
|
||||||
|
e.g. [ id:'test', single_end:false ]
|
||||||
|
- vcf:
|
||||||
|
type: file
|
||||||
|
description: |
|
||||||
|
The vcf file to be normalized
|
||||||
|
e.g. 'file1.vcf.gz'
|
||||||
|
- tbi:
|
||||||
|
type: file
|
||||||
|
description: |
|
||||||
|
Index of the vcf file to be normalized
|
||||||
|
e.g. 'file1.vcf.gz.tbi'
|
||||||
|
- intervals:
|
||||||
|
type: file
|
||||||
|
description: Bed file with the genomic regions included in the library (optional)
|
||||||
|
- fasta:
|
||||||
|
type: file
|
||||||
|
description: The reference fasta file
|
||||||
|
pattern: "*.fasta"
|
||||||
|
- fai:
|
||||||
|
type: file
|
||||||
|
description: Index of reference fasta file
|
||||||
|
pattern: "*.fasta.fai"
|
||||||
|
- dict:
|
||||||
|
type: file
|
||||||
|
description: GATK sequence dictionary
|
||||||
|
pattern: "*.dict"
|
||||||
|
|
||||||
|
output:
|
||||||
|
- meta:
|
||||||
|
type: map
|
||||||
|
description: |
|
||||||
|
Groovy Map containing sample information
|
||||||
|
e.g. [ id:'test', single_end:false ]
|
||||||
|
- versions:
|
||||||
|
type: file
|
||||||
|
description: File containing software versions
|
||||||
|
pattern: "versions.yml"
|
||||||
|
- vcf:
|
||||||
|
type: file
|
||||||
|
description: VCF normalized output file
|
||||||
|
pattern: "*.{vcf.gz}"
|
||||||
|
- tbi:
|
||||||
|
type: file
|
||||||
|
description: Tbi index for VCF file
|
||||||
|
pattern: "*.tbi"
|
||||||
|
authors:
|
||||||
|
- "@adamrtalbot"
|
|
@ -863,6 +863,10 @@ gatk4/learnreadorientationmodel:
|
||||||
- modules/gatk4/learnreadorientationmodel/**
|
- modules/gatk4/learnreadorientationmodel/**
|
||||||
- tests/modules/gatk4/learnreadorientationmodel/**
|
- tests/modules/gatk4/learnreadorientationmodel/**
|
||||||
|
|
||||||
|
gatk4/leftalignandtrimvariants:
|
||||||
|
- modules/gatk4/leftalignandtrimvariants/**
|
||||||
|
- tests/modules/gatk4/leftalignandtrimvariants/**
|
||||||
|
|
||||||
gatk4/markduplicates:
|
gatk4/markduplicates:
|
||||||
- modules/gatk4/markduplicates/**
|
- modules/gatk4/markduplicates/**
|
||||||
- tests/modules/gatk4/markduplicates/**
|
- tests/modules/gatk4/markduplicates/**
|
||||||
|
|
35
tests/modules/gatk4/leftalignandtrimvariants/main.nf
Normal file
35
tests/modules/gatk4/leftalignandtrimvariants/main.nf
Normal file
|
@ -0,0 +1,35 @@
|
||||||
|
#!/usr/bin/env nextflow
|
||||||
|
|
||||||
|
nextflow.enable.dsl = 2
|
||||||
|
|
||||||
|
include { GATK4_LEFTALIGNANDTRIMVARIANTS } from '../../../../modules/gatk4/leftalignandtrimvariants/main.nf'
|
||||||
|
|
||||||
|
workflow test_gatk4_leftalignandtrimvariants_interval {
|
||||||
|
|
||||||
|
input = [ [ id:'test' ], // meta map
|
||||||
|
file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true),
|
||||||
|
file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true),
|
||||||
|
file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true),
|
||||||
|
]
|
||||||
|
|
||||||
|
fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
|
||||||
|
fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)
|
||||||
|
dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true)
|
||||||
|
|
||||||
|
GATK4_LEFTALIGNANDTRIMVARIANTS ( input, fasta, fai, dict )
|
||||||
|
}
|
||||||
|
|
||||||
|
workflow test_gatk4_leftalignandtrimvariants_no_interval {
|
||||||
|
|
||||||
|
input = [ [ id:'test' ], // meta map
|
||||||
|
file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true),
|
||||||
|
file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true),
|
||||||
|
[]
|
||||||
|
]
|
||||||
|
|
||||||
|
fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
|
||||||
|
fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)
|
||||||
|
dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true)
|
||||||
|
|
||||||
|
GATK4_LEFTALIGNANDTRIMVARIANTS ( input, fasta, fai, dict )
|
||||||
|
}
|
|
@ -0,0 +1,9 @@
|
||||||
|
process {
|
||||||
|
|
||||||
|
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
|
||||||
|
|
||||||
|
withName: 'GATK4_LEFTALIGNANDTRIMVARIANTS' {
|
||||||
|
ext.args = "--split-multi-allelics --dont-trim-alleles --keep-original-ac"
|
||||||
|
ext.prefix = { "${meta.id}.normalised" }
|
||||||
|
}
|
||||||
|
}
|
22
tests/modules/gatk4/leftalignandtrimvariants/test.yml
Normal file
22
tests/modules/gatk4/leftalignandtrimvariants/test.yml
Normal file
|
@ -0,0 +1,22 @@
|
||||||
|
- name: gatk4 leftalignandtrimvariants test_gatk4_leftalignandtrimvariants_interval
|
||||||
|
command: nextflow run ./tests/modules/gatk4/leftalignandtrimvariants -entry test_gatk4_leftalignandtrimvariants_interval -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/leftalignandtrimvariants/nextflow.config
|
||||||
|
tags:
|
||||||
|
- gatk4
|
||||||
|
- gatk4/leftalignandtrimvariants
|
||||||
|
files:
|
||||||
|
- path: output/gatk4/test.normalised.vcf.gz
|
||||||
|
contains:
|
||||||
|
- "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT"
|
||||||
|
- path: output/gatk4/test.normalised.vcf.gz.tbi
|
||||||
|
|
||||||
|
- name: gatk4 leftalignandtrimvariants test_gatk4_leftalignandtrimvariants_no_interval
|
||||||
|
command: nextflow run ./tests/modules/gatk4/leftalignandtrimvariants -entry test_gatk4_leftalignandtrimvariants_no_interval -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/leftalignandtrimvariants/nextflow.config
|
||||||
|
tags:
|
||||||
|
- gatk4
|
||||||
|
- gatk4/leftalignandtrimvariants
|
||||||
|
files:
|
||||||
|
- path: output/gatk4/test.normalised.vcf.gz
|
||||||
|
contains:
|
||||||
|
- "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT"
|
||||||
|
- "MT192765.1\t10502\t.\tTAGATTATGACTGTGTCTCTTTTTGTTACATGCACCA\tTAGAT"
|
||||||
|
- path: output/gatk4/test.normalised.vcf.gz.tbi
|
Loading…
Reference in a new issue