Merge pull request #1717 from jfy133/gatk-realigntargetcreator

Add gatk/realignertargetcreator
This commit is contained in:
James A. Fellows Yates 2022-06-08 20:27:39 +02:00 committed by GitHub
commit f41712d2ce
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 152 additions and 0 deletions

View file

@ -0,0 +1,53 @@
process GATK_REALIGNERTARGETCREATOR {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "bioconda::gatk=3.5" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/gatk:3.5--hdfd78af_11':
'quay.io/biocontainers/gatk:3.5--hdfd78af_11' }"
input:
tuple val(meta), path(input), path(index)
path fasta
path fai
path dict
path known_vcf
output:
tuple val(meta), path("*.intervals"), emit: intervals
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def known = known_vcf ? "-known ${known_vcf}" : ""
if ("$input" == "${prefix}.bam") error "Input and output names are the same, set prefix in module configuration to disambiguate!"
def avail_mem = 3
if (!task.memory) {
log.info '[GATK RealignerTargetCreator] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
} else {
avail_mem = task.memory.giga
}
"""
gatk3 \\
-Xmx${avail_mem}g \\
-T RealignerTargetCreator \\
-nt ${task.cpus} \\
-I ${input} \\
-R ${fasta} \\
-o ${prefix}.intervals \\
${known} \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":
gatk: \$(echo \$(gatk3 --version))
END_VERSIONS
"""
}

View file

@ -0,0 +1,64 @@
name: "gatk_realignertargetcreator"
description: Generates a list of locations that should be considered for local realignment prior genotyping.
keywords:
- bam
- vcf
- variant calling
- indel
- realignment
- targets
tools:
- "gatk":
description: "The full Genome Analysis Toolkit (GATK) framework, license restricted."
homepage: "https://gatk.broadinstitute.org/hc/en-us"
documentation: "https://github.com/broadinstitute/gatk-docs"
licence: "['https://software.broadinstitute.org/gatk/download/licensing', 'BSD', 'https://www.broadinstitute.org/gatk/about/#licensing']"
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- input:
type: file
description: Sorted and indexed BAM/CRAM/SAM file
pattern: "*.bam"
- index:
type: file
description: BAM index file
pattern: "*.bai"
- fasta:
type: file
description: Reference file used to generate BAM file
pattern: ".{fasta,fa,fna}"
- fai:
type: file
description: Index of reference file used to generate BAM file
pattern: ".fai"
- dict:
type: file
description: GATK dict file for reference
pattern: ".dict"
- known_vcf:
type: file
description: Optional input VCF file(s) with known indels
pattern: ".vcf"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- intervals:
type: file
description: File containg intervals that represent sites of extant and potential indels.
pattern: "*.intervals"
authors:
- "@jfy133"

View file

@ -715,6 +715,10 @@ gamma/gamma:
- modules/gamma/gamma/**
- tests/modules/gamma/gamma/**
gatk/realignertargetcreator:
- modules/gatk/realignertargetcreator/**
- tests/modules/gatk/realignertargetcreator/**
gatk/unifiedgenotyper:
- modules/gatk/unifiedgenotyper/**
- tests/modules/gatk/unifiedgenotyper/**

View file

@ -0,0 +1,18 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { GATK_REALIGNERTARGETCREATOR } from '../../../../modules/gatk/realignertargetcreator/main.nf'
workflow test_gatk_realignertargetcreator {
input = [ [ id:'test' ], // meta map
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true),
]
fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true)
GATK_REALIGNERTARGETCREATOR ( input, fasta, fai, dict, [] )
}

View file

@ -0,0 +1,5 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}

View file

@ -0,0 +1,8 @@
- name: gatk realignertargetcreator test_gatk_realignertargetcreator
command: nextflow run ./tests/modules/gatk/realignertargetcreator -entry test_gatk_realignertargetcreator -c ./tests/config/nextflow.config -c ./tests/modules/gatk/realignertargetcreator/nextflow.config
tags:
- gatk
- gatk/realignertargetcreator
files:
- path: output/gatk/test.intervals
md5sum: 7aa7a1b235a510e6591e262382086bf8