Merge pull request #1740 from jfy133/gatk3-indelrealigner

Add Gatk3 indelrealigner
This commit is contained in:
James A. Fellows Yates 2022-06-09 08:40:59 +02:00 committed by GitHub
commit 438f6c5281
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 180 additions and 0 deletions

View file

@ -0,0 +1,54 @@
process GATK_INDELREALIGNER {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "bioconda::gatk=3.5" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/gatk:3.5--hdfd78af_11':
'quay.io/biocontainers/gatk:3.5--hdfd78af_11' }"
input:
tuple val(meta), path(bam), path(bai), path(intervals)
path(fasta)
path(fai)
path(dict)
path(known_vcf)
output:
tuple val(meta), path("*.bam"), path("*.bai"), emit: bam
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def known = known_vcf ? "-known ${known_vcf}" : ""
if ("$bam" == "${prefix}.bam") error "Input and output names are the same, set prefix in module configuration to disambiguate!"
def avail_mem = 3
if (!task.memory) {
log.info '[GATK IndelRealigner] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
} else {
avail_mem = task.memory.giga
}
"""
gatk3 \\
-Xmx${avail_mem}g \\
-T IndelRealigner \\
-R ${fasta} \\
-I ${bam} \\
--targetIntervals ${intervals} \\
${known} \\
-o ${prefix}.bam \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":
gatk: \$(echo \$(gatk3 --version))
END_VERSIONS
"""
}

View file

@ -0,0 +1,71 @@
name: "gatk_indelrealigner"
description: Performs local realignment around indels to correct for mapping errors
keywords:
- bam
- vcf
- variant calling
- indel
- realignment
tools:
- "gatk":
description: "The full Genome Analysis Toolkit (GATK) framework, license restricted."
homepage: "https://gatk.broadinstitute.org/hc/en-us"
documentation: "https://github.com/broadinstitute/gatk-docs"
licence: "['https://software.broadinstitute.org/gatk/download/licensing', 'BSD', 'https://www.broadinstitute.org/gatk/about/#licensing']"
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bam:
type: file
description: Sorted and indexed BAM file
pattern: "*.bam"
- bai:
type: file
description: BAM index file
pattern: "*.bai"
- intervals:
type: file
description: Intervals file created by gatk3 RealignerTargetCreator
pattern: "*.{intervals,list}"
- fasta:
type: file
description: Reference file used to generate BAM file
pattern: ".{fasta,fa,fna}"
- fai:
type: file
description: Index of reference file used to generate BAM file
pattern: ".fai"
- dict:
type: file
description: GATK dict file for reference
pattern: ".dict"
- known_vcf:
type: file
description: Optional input VCF file(s) with known indels
pattern: ".vcf"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- bam:
type: file
description: Sorted and indexed BAM file with local realignment around variants
pattern: "*.bam"
- bai:
type: file
description: Output BAM Index file
pattern: "*.bai"
authors:
- "@jfy133"

View file

@ -715,6 +715,10 @@ gamma/gamma:
- modules/gamma/gamma/**
- tests/modules/gamma/gamma/**
gatk/indelrealigner:
- modules/gatk/indelrealigner/**
- tests/modules/gatk/indelrealigner/**
gatk/realignertargetcreator:
- modules/gatk/realignertargetcreator/**
- tests/modules/gatk/realignertargetcreator/**

View file

@ -0,0 +1,33 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { GATK_REALIGNERTARGETCREATOR } from '../../../../modules/gatk/realignertargetcreator/main.nf'
include { GATK_INDELREALIGNER } from '../../../../modules/gatk/indelrealigner/main.nf'
workflow test_gatk_indelrealigner {
fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true)
input_realignertargetcreator = [ [ id:'test' ], // meta map
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true),
]
GATK_REALIGNERTARGETCREATOR ( input_realignertargetcreator, fasta, fai, dict, [] )
ch_intervals = GATK_REALIGNERTARGETCREATOR.out.intervals
ch_bams_indelrealigner = Channel.of([ [ id:'test' ], // meta map
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true)
])
ch_input_indelrealigner = ch_bams_indelrealigner.mix(ch_intervals).groupTuple(by: 0).map{ [it[0], it[1][0], it[2], it[1][1] ] }.dump(tag: "input")
GATK_INDELREALIGNER ( ch_input_indelrealigner, fasta, fai, dict, [] )
}

View file

@ -0,0 +1,6 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
ext.prefix = { "${meta.id}.realigned" }
}

View file

@ -0,0 +1,12 @@
- name: gatk indelrealigner test_gatk_indelrealigner
command: nextflow run ./tests/modules/gatk/indelrealigner -entry test_gatk_indelrealigner -c ./tests/config/nextflow.config -c ./tests/modules/gatk/indelrealigner/nextflow.config
tags:
- gatk/indelrealigner
- gatk
files:
- path: output/gatk/test.realigned.bai
md5sum: 85a67df8827fe426e7f3a458134c0551
- path: output/gatk/test.realigned.bam
md5sum: ea1df6f7fcafc408fae4dc1574813d8a
- path: output/gatk/test.realigned.intervals
md5sum: 7aa7a1b235a510e6591e262382086bf8