mirror of
https://github.com/MillironX/nf-core_modules.git
synced 2024-12-22 02:58:17 +00:00
Variant recalibration (#1885)
* update tests * update * update * make the manta inputs consistant for germline/somatic/tumoronly * match chromosomes to cram file (chr21) * undo genotypegvfs * undo genotypegvfs * update VariantRecalibrator * lint * add '--resource:' tag Co-authored-by: Smith Nicholas <smith@in.tum.de> Co-authored-by: FriederikeHanssen <Friederike.hanssen@qbic.uni-tuebingen.de>
This commit is contained in:
parent
9deff5222e
commit
edfe28a5e0
4 changed files with 42 additions and 52 deletions
|
@ -8,8 +8,10 @@ process GATK4_VARIANTRECALIBRATOR {
|
|||
'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }"
|
||||
|
||||
input:
|
||||
tuple val(meta), path(vcf), path(tbi)
|
||||
tuple path(vcfs), path(tbis), val(labels)
|
||||
tuple val(meta), path(vcf), path(tbi) // input vcf and tbi of variants to recalibrate
|
||||
path resource_vcf // resource vcf
|
||||
path resource_tbi // resource tbi
|
||||
val labels // string (or list of strings) containing dedicated resource labels already formatted with '--resource:' tag
|
||||
path fasta
|
||||
path fai
|
||||
path dict
|
||||
|
@ -28,7 +30,7 @@ process GATK4_VARIANTRECALIBRATOR {
|
|||
def args = task.ext.args ?: ''
|
||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
def reference_command = fasta ? "--reference $fasta " : ''
|
||||
def resource_command = labels.collect{"--resource:$it"}.join(' ')
|
||||
def labels_command = labels.join(' ')
|
||||
|
||||
def avail_mem = 3
|
||||
if (!task.memory) {
|
||||
|
@ -42,8 +44,8 @@ process GATK4_VARIANTRECALIBRATOR {
|
|||
--output ${prefix}.recal \\
|
||||
--tranches-file ${prefix}.tranches \\
|
||||
$reference_command \\
|
||||
$resource_command \\
|
||||
--tmp-dir . \\
|
||||
$labels_command \\
|
||||
$args
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
|
|
|
@ -33,6 +33,17 @@ input:
|
|||
type: file
|
||||
description: tbi file matching with -vcf
|
||||
pattern: "*.vcf.gz.tbi"
|
||||
- resource_vcf:
|
||||
type: file
|
||||
description: all resource vcf files that are used with the corresponding '--resource' label
|
||||
pattern: "*.vcf.gz"
|
||||
- resource_tbi:
|
||||
type: file
|
||||
description: all resource tbi files that are used with the corresponding '--resource' label
|
||||
pattern: "*.vcf.gz.tbi"
|
||||
- labels:
|
||||
type: string
|
||||
description: necessary arguments for GATK VariantRecalibrator. Specified to directly match the resources provided. More information can be found at https://gatk.broadinstitute.org/hc/en-us/articles/5358906115227-VariantRecalibrator
|
||||
- fasta:
|
||||
type: file
|
||||
description: The reference fasta file
|
||||
|
@ -45,34 +56,6 @@ input:
|
|||
type: file
|
||||
description: GATK sequence dictionary
|
||||
pattern: "*.dict"
|
||||
- allelespecific:
|
||||
type: boolean
|
||||
description: specify whether to use allele specific annotations
|
||||
pattern: "{true,false}"
|
||||
- resvcfs:
|
||||
type: list
|
||||
description: resource files to be used as truth, training and known sites resources, this imports the files into the module, file names are specified again in the resource_labels to be called via the command.
|
||||
pattern: "*/hapmap_3.3.hg38_chr21.vcf.gz"
|
||||
- restbis:
|
||||
type: list
|
||||
description: tbis for the corresponding vcfs files to be used as truth, training and known resources.
|
||||
pattern: "*/hapmap_3.3.hg38_chr21.vcf.gz.tbi"
|
||||
- reslabels:
|
||||
type: list
|
||||
description: labels for the resource files to be used as truth, training and known sites resources, label should include an identifier,which kind of resource(s) it is, prior value and name of the file.
|
||||
pattern: "hapmap,known=false,training=true,truth=true,prior=15.0 hapmap_3.3.hg38_chr21.vcf.gz"
|
||||
- annotation:
|
||||
type: list
|
||||
description: specify which annotations should be used for calculations.
|
||||
pattern: "['QD', 'MQ', 'FS', 'SOR']"
|
||||
- mode:
|
||||
type: string
|
||||
description: specifies which recalibration mode to employ (SNP is default, BOTH is intended for testing only)
|
||||
pattern: "{SNP,INDEL,BOTH}"
|
||||
- rscript:
|
||||
type: boolean
|
||||
description: specify whether to generate rscript.plot output file
|
||||
pattern: "{true,false}"
|
||||
output:
|
||||
- recal:
|
||||
type: file
|
||||
|
@ -96,3 +79,4 @@ output:
|
|||
pattern: "*.versions.yml"
|
||||
authors:
|
||||
- "@GCJMackenzie"
|
||||
- "@nickhsmith"
|
||||
|
|
|
@ -12,28 +12,30 @@ workflow test_gatk4_variantrecalibrator {
|
|||
file(params.test_data['homo_sapiens']['illumina']['test2_haplotc_ann_vcf_gz_tbi'], checkIfExists: true)
|
||||
]
|
||||
|
||||
resources = [[
|
||||
resources_vcf = [
|
||||
file(params.test_data['homo_sapiens']['genome']['hapmap_3_3_hg38_21_vcf_gz'], checkIfExists: true),
|
||||
file(params.test_data['homo_sapiens']['genome']['res_1000g_omni2_5_hg38_21_vcf_gz'], checkIfExists: true),
|
||||
file(params.test_data['homo_sapiens']['genome']['res_1000g_phase1_snps_hg38_21_vcf_gz'], checkIfExists: true),
|
||||
file(params.test_data['homo_sapiens']['genome']['dbsnp_138_hg38_21_vcf_gz'], checkIfExists: true)
|
||||
], [
|
||||
]
|
||||
resources_tbi = [
|
||||
file(params.test_data['homo_sapiens']['genome']['hapmap_3_3_hg38_21_vcf_gz_tbi'], checkIfExists: true),
|
||||
file(params.test_data['homo_sapiens']['genome']['res_1000g_omni2_5_hg38_21_vcf_gz_tbi'], checkIfExists: true),
|
||||
file(params.test_data['homo_sapiens']['genome']['res_1000g_phase1_snps_hg38_21_vcf_gz_tbi'], checkIfExists: true),
|
||||
file(params.test_data['homo_sapiens']['genome']['dbsnp_138_hg38_21_vcf_gz_tbi'], checkIfExists: true)
|
||||
], [
|
||||
'hapmap,known=false,training=true,truth=true,prior=15.0 hapmap_3.3.hg38.vcf.gz',
|
||||
'omni,known=false,training=true,truth=false,prior=12.0 1000G_omni2.5.hg38.vcf.gz',
|
||||
'1000G,known=false,training=true,truth=false,prior=10.0 1000G_phase1.snps.hg38.vcf.gz',
|
||||
'dbsnp,known=true,training=false,truth=false,prior=2.0 dbsnp_138.hg38.vcf.gz'
|
||||
]]
|
||||
]
|
||||
labels = [
|
||||
'--resource:hapmap,known=false,training=true,truth=true,prior=15.0 hapmap_3.3.hg38.vcf.gz',
|
||||
'--resource:omni,known=false,training=true,truth=false,prior=12.0 1000G_omni2.5.hg38.vcf.gz',
|
||||
'--resource:1000G,known=false,training=true,truth=false,prior=10.0 1000G_phase1.snps.hg38.vcf.gz',
|
||||
'--resource:dbsnp,known=true,training=false,truth=false,prior=2.0 dbsnp_138.hg38.vcf.gz'
|
||||
]
|
||||
|
||||
fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true)
|
||||
fai = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true)
|
||||
dict = file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists: true)
|
||||
|
||||
GATK4_VARIANTRECALIBRATOR_NO_ALLELESPECIFICTY(input, resources, fasta, fai, dict)
|
||||
GATK4_VARIANTRECALIBRATOR_NO_ALLELESPECIFICTY(input, resources_vcf, resources_tbi, labels, fasta, fai, dict)
|
||||
}
|
||||
|
||||
workflow test_gatk4_variantrecalibrator_allele_specific {
|
||||
|
@ -43,26 +45,28 @@ workflow test_gatk4_variantrecalibrator_allele_specific {
|
|||
file(params.test_data['homo_sapiens']['illumina']['test2_haplotc_ann_vcf_gz_tbi'], checkIfExists: true)
|
||||
]
|
||||
|
||||
resources = [[
|
||||
resources_vcf = [
|
||||
file(params.test_data['homo_sapiens']['genome']['hapmap_3_3_hg38_21_vcf_gz'], checkIfExists: true),
|
||||
file(params.test_data['homo_sapiens']['genome']['res_1000g_omni2_5_hg38_21_vcf_gz'], checkIfExists: true),
|
||||
file(params.test_data['homo_sapiens']['genome']['res_1000g_phase1_snps_hg38_21_vcf_gz'], checkIfExists: true),
|
||||
file(params.test_data['homo_sapiens']['genome']['dbsnp_138_hg38_21_vcf_gz'], checkIfExists: true)
|
||||
], [
|
||||
]
|
||||
resources_tbi = [
|
||||
file(params.test_data['homo_sapiens']['genome']['hapmap_3_3_hg38_21_vcf_gz_tbi'], checkIfExists: true),
|
||||
file(params.test_data['homo_sapiens']['genome']['res_1000g_omni2_5_hg38_21_vcf_gz_tbi'], checkIfExists: true),
|
||||
file(params.test_data['homo_sapiens']['genome']['res_1000g_phase1_snps_hg38_21_vcf_gz_tbi'], checkIfExists: true),
|
||||
file(params.test_data['homo_sapiens']['genome']['dbsnp_138_hg38_21_vcf_gz_tbi'], checkIfExists: true)
|
||||
], [
|
||||
'hapmap,known=false,training=true,truth=true,prior=15.0 hapmap_3.3.hg38.vcf.gz',
|
||||
'omni,known=false,training=true,truth=false,prior=12.0 1000G_omni2.5.hg38.vcf.gz',
|
||||
'1000G,known=false,training=true,truth=false,prior=10.0 1000G_phase1.snps.hg38.vcf.gz',
|
||||
'dbsnp,known=true,training=false,truth=false,prior=2.0 dbsnp_138.hg38.vcf.gz'
|
||||
]]
|
||||
]
|
||||
labels = [
|
||||
'--resource:hapmap,known=false,training=true,truth=true,prior=15.0 hapmap_3.3.hg38.vcf.gz',
|
||||
'--resource:omni,known=false,training=true,truth=false,prior=12.0 1000G_omni2.5.hg38.vcf.gz',
|
||||
'--resource:1000G,known=false,training=true,truth=false,prior=10.0 1000G_phase1.snps.hg38.vcf.gz',
|
||||
'--resource:dbsnp,known=true,training=false,truth=false,prior=2.0 dbsnp_138.hg38.vcf.gz'
|
||||
]
|
||||
|
||||
fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true)
|
||||
fai = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true)
|
||||
dict = file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists: true)
|
||||
|
||||
GATK4_VARIANTRECALIBRATOR_WITH_ALLELESPECIFICTY(input, resources, fasta, fai, dict)
|
||||
GATK4_VARIANTRECALIBRATOR_WITH_ALLELESPECIFICTY(input, resources_vcf, resources_tbi, labels, fasta, fai, dict)
|
||||
}
|
||||
|
|
|
@ -3,10 +3,10 @@ process {
|
|||
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
|
||||
|
||||
withName: GATK4_VARIANTRECALIBRATOR {
|
||||
ext.args = '--mode SNP -an QD -an MQ -an FS -an SOR'
|
||||
ext.args = '-mode SNP -an QD -an MQ -an FS -an SOR'
|
||||
}
|
||||
|
||||
withName: GATK4_VARIANTRECALIBRATOR_WITH_ALLELESPECIFICTY {
|
||||
ext.args = '--mode SNP -an QD -an MQ -an FS -AS'
|
||||
ext.args = '-mode SNP -an QD -an MQ -an FS -AS'
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue