diff --git a/modules/gatk4/variantrecalibrator/main.nf b/modules/gatk4/variantrecalibrator/main.nf index 120aeade..961e60d8 100644 --- a/modules/gatk4/variantrecalibrator/main.nf +++ b/modules/gatk4/variantrecalibrator/main.nf @@ -8,8 +8,10 @@ process GATK4_VARIANTRECALIBRATOR { 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" input: - tuple val(meta), path(vcf), path(tbi) - tuple path(vcfs), path(tbis), val(labels) + tuple val(meta), path(vcf), path(tbi) // input vcf and tbi of variants to recalibrate + path resource_vcf // resource vcf + path resource_tbi // resource tbi + val labels // string (or list of strings) containing dedicated resource labels already formatted with '--resource:' tag path fasta path fai path dict @@ -28,7 +30,7 @@ process GATK4_VARIANTRECALIBRATOR { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def reference_command = fasta ? "--reference $fasta " : '' - def resource_command = labels.collect{"--resource:$it"}.join(' ') + def labels_command = labels.join(' ') def avail_mem = 3 if (!task.memory) { @@ -42,8 +44,8 @@ process GATK4_VARIANTRECALIBRATOR { --output ${prefix}.recal \\ --tranches-file ${prefix}.tranches \\ $reference_command \\ - $resource_command \\ --tmp-dir . \\ + $labels_command \\ $args cat <<-END_VERSIONS > versions.yml diff --git a/modules/gatk4/variantrecalibrator/meta.yml b/modules/gatk4/variantrecalibrator/meta.yml index afe33d7a..6ed3a40e 100644 --- a/modules/gatk4/variantrecalibrator/meta.yml +++ b/modules/gatk4/variantrecalibrator/meta.yml @@ -33,6 +33,17 @@ input: type: file description: tbi file matching with -vcf pattern: "*.vcf.gz.tbi" + - resource_vcf: + type: file + description: all resource vcf files that are used with the corresponding '--resource' label + pattern: "*.vcf.gz" + - resource_tbi: + type: file + description: all resource tbi files that are used with the corresponding '--resource' label + pattern: "*.vcf.gz.tbi" + - labels: + type: string + description: necessary arguments for GATK VariantRecalibrator. Specified to directly match the resources provided. More information can be found at https://gatk.broadinstitute.org/hc/en-us/articles/5358906115227-VariantRecalibrator - fasta: type: file description: The reference fasta file @@ -45,34 +56,6 @@ input: type: file description: GATK sequence dictionary pattern: "*.dict" - - allelespecific: - type: boolean - description: specify whether to use allele specific annotations - pattern: "{true,false}" - - resvcfs: - type: list - description: resource files to be used as truth, training and known sites resources, this imports the files into the module, file names are specified again in the resource_labels to be called via the command. - pattern: "*/hapmap_3.3.hg38_chr21.vcf.gz" - - restbis: - type: list - description: tbis for the corresponding vcfs files to be used as truth, training and known resources. - pattern: "*/hapmap_3.3.hg38_chr21.vcf.gz.tbi" - - reslabels: - type: list - description: labels for the resource files to be used as truth, training and known sites resources, label should include an identifier,which kind of resource(s) it is, prior value and name of the file. - pattern: "hapmap,known=false,training=true,truth=true,prior=15.0 hapmap_3.3.hg38_chr21.vcf.gz" - - annotation: - type: list - description: specify which annotations should be used for calculations. - pattern: "['QD', 'MQ', 'FS', 'SOR']" - - mode: - type: string - description: specifies which recalibration mode to employ (SNP is default, BOTH is intended for testing only) - pattern: "{SNP,INDEL,BOTH}" - - rscript: - type: boolean - description: specify whether to generate rscript.plot output file - pattern: "{true,false}" output: - recal: type: file @@ -96,3 +79,4 @@ output: pattern: "*.versions.yml" authors: - "@GCJMackenzie" + - "@nickhsmith" diff --git a/tests/modules/gatk4/variantrecalibrator/main.nf b/tests/modules/gatk4/variantrecalibrator/main.nf index 66dde5dd..6ece09eb 100644 --- a/tests/modules/gatk4/variantrecalibrator/main.nf +++ b/tests/modules/gatk4/variantrecalibrator/main.nf @@ -12,28 +12,30 @@ workflow test_gatk4_variantrecalibrator { file(params.test_data['homo_sapiens']['illumina']['test2_haplotc_ann_vcf_gz_tbi'], checkIfExists: true) ] - resources = [[ + resources_vcf = [ file(params.test_data['homo_sapiens']['genome']['hapmap_3_3_hg38_21_vcf_gz'], checkIfExists: true), file(params.test_data['homo_sapiens']['genome']['res_1000g_omni2_5_hg38_21_vcf_gz'], checkIfExists: true), file(params.test_data['homo_sapiens']['genome']['res_1000g_phase1_snps_hg38_21_vcf_gz'], checkIfExists: true), file(params.test_data['homo_sapiens']['genome']['dbsnp_138_hg38_21_vcf_gz'], checkIfExists: true) - ], [ + ] + resources_tbi = [ file(params.test_data['homo_sapiens']['genome']['hapmap_3_3_hg38_21_vcf_gz_tbi'], checkIfExists: true), file(params.test_data['homo_sapiens']['genome']['res_1000g_omni2_5_hg38_21_vcf_gz_tbi'], checkIfExists: true), file(params.test_data['homo_sapiens']['genome']['res_1000g_phase1_snps_hg38_21_vcf_gz_tbi'], checkIfExists: true), file(params.test_data['homo_sapiens']['genome']['dbsnp_138_hg38_21_vcf_gz_tbi'], checkIfExists: true) - ], [ - 'hapmap,known=false,training=true,truth=true,prior=15.0 hapmap_3.3.hg38.vcf.gz', - 'omni,known=false,training=true,truth=false,prior=12.0 1000G_omni2.5.hg38.vcf.gz', - '1000G,known=false,training=true,truth=false,prior=10.0 1000G_phase1.snps.hg38.vcf.gz', - 'dbsnp,known=true,training=false,truth=false,prior=2.0 dbsnp_138.hg38.vcf.gz' - ]] + ] + labels = [ + '--resource:hapmap,known=false,training=true,truth=true,prior=15.0 hapmap_3.3.hg38.vcf.gz', + '--resource:omni,known=false,training=true,truth=false,prior=12.0 1000G_omni2.5.hg38.vcf.gz', + '--resource:1000G,known=false,training=true,truth=false,prior=10.0 1000G_phase1.snps.hg38.vcf.gz', + '--resource:dbsnp,known=true,training=false,truth=false,prior=2.0 dbsnp_138.hg38.vcf.gz' + ] fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) fai = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true) dict = file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists: true) - GATK4_VARIANTRECALIBRATOR_NO_ALLELESPECIFICTY(input, resources, fasta, fai, dict) + GATK4_VARIANTRECALIBRATOR_NO_ALLELESPECIFICTY(input, resources_vcf, resources_tbi, labels, fasta, fai, dict) } workflow test_gatk4_variantrecalibrator_allele_specific { @@ -43,26 +45,28 @@ workflow test_gatk4_variantrecalibrator_allele_specific { file(params.test_data['homo_sapiens']['illumina']['test2_haplotc_ann_vcf_gz_tbi'], checkIfExists: true) ] - resources = [[ + resources_vcf = [ file(params.test_data['homo_sapiens']['genome']['hapmap_3_3_hg38_21_vcf_gz'], checkIfExists: true), file(params.test_data['homo_sapiens']['genome']['res_1000g_omni2_5_hg38_21_vcf_gz'], checkIfExists: true), file(params.test_data['homo_sapiens']['genome']['res_1000g_phase1_snps_hg38_21_vcf_gz'], checkIfExists: true), file(params.test_data['homo_sapiens']['genome']['dbsnp_138_hg38_21_vcf_gz'], checkIfExists: true) - ], [ + ] + resources_tbi = [ file(params.test_data['homo_sapiens']['genome']['hapmap_3_3_hg38_21_vcf_gz_tbi'], checkIfExists: true), file(params.test_data['homo_sapiens']['genome']['res_1000g_omni2_5_hg38_21_vcf_gz_tbi'], checkIfExists: true), file(params.test_data['homo_sapiens']['genome']['res_1000g_phase1_snps_hg38_21_vcf_gz_tbi'], checkIfExists: true), file(params.test_data['homo_sapiens']['genome']['dbsnp_138_hg38_21_vcf_gz_tbi'], checkIfExists: true) - ], [ - 'hapmap,known=false,training=true,truth=true,prior=15.0 hapmap_3.3.hg38.vcf.gz', - 'omni,known=false,training=true,truth=false,prior=12.0 1000G_omni2.5.hg38.vcf.gz', - '1000G,known=false,training=true,truth=false,prior=10.0 1000G_phase1.snps.hg38.vcf.gz', - 'dbsnp,known=true,training=false,truth=false,prior=2.0 dbsnp_138.hg38.vcf.gz' - ]] + ] + labels = [ + '--resource:hapmap,known=false,training=true,truth=true,prior=15.0 hapmap_3.3.hg38.vcf.gz', + '--resource:omni,known=false,training=true,truth=false,prior=12.0 1000G_omni2.5.hg38.vcf.gz', + '--resource:1000G,known=false,training=true,truth=false,prior=10.0 1000G_phase1.snps.hg38.vcf.gz', + '--resource:dbsnp,known=true,training=false,truth=false,prior=2.0 dbsnp_138.hg38.vcf.gz' + ] fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) fai = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true) dict = file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists: true) - GATK4_VARIANTRECALIBRATOR_WITH_ALLELESPECIFICTY(input, resources, fasta, fai, dict) + GATK4_VARIANTRECALIBRATOR_WITH_ALLELESPECIFICTY(input, resources_vcf, resources_tbi, labels, fasta, fai, dict) } diff --git a/tests/modules/gatk4/variantrecalibrator/nextflow.config b/tests/modules/gatk4/variantrecalibrator/nextflow.config index 6c3a9116..e51b9044 100644 --- a/tests/modules/gatk4/variantrecalibrator/nextflow.config +++ b/tests/modules/gatk4/variantrecalibrator/nextflow.config @@ -3,10 +3,10 @@ process { publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } withName: GATK4_VARIANTRECALIBRATOR { - ext.args = '--mode SNP -an QD -an MQ -an FS -an SOR' + ext.args = '-mode SNP -an QD -an MQ -an FS -an SOR' } withName: GATK4_VARIANTRECALIBRATOR_WITH_ALLELESPECIFICTY { - ext.args = '--mode SNP -an QD -an MQ -an FS -AS' + ext.args = '-mode SNP -an QD -an MQ -an FS -AS' } }