From 61376425fb7996d38ae9ac78dfd0dafd4ac7b8a5 Mon Sep 17 00:00:00 2001 From: GCJMackenzie <43276267+GCJMackenzie@users.noreply.github.com> Date: Tue, 14 Dec 2021 16:10:08 +0000 Subject: [PATCH] BugFix: add missing tbi output channel to genotypeGVCFs (#1153) * initial commit to setup branch * workflow finished * Update nextflow.config * tumour to tumor, getpileup passed as nomral and tumor * paired_somatic renamed to tumor_normal_somatic * Apply suggestions from code review Co-authored-by: Maxime U. Garcia * Update subworkflows/nf-core/gatk_tumor_normal_somatic_variant_calling/main.nf Co-authored-by: Maxime U. Garcia * updated index names in meta.yml * changed index file names in main script and test * Apply suggestions from code review Co-authored-by: Maxime U. Garcia * Apply suggestions from code review * fixed bug from changes * Apply suggestions from code review * modified yml to allow new subworkflow testing * Update test.yml * Update test.yml * added output channel for tbi files, tweaked method of adding blank inputs for gendb tests * Update main.nf * Update main.nf Co-authored-by: GCJMackenzie Co-authored-by: Maxime U. Garcia --- modules/gatk4/genotypegvcfs/main.nf | 7 +-- modules/gatk4/genotypegvcfs/meta.yml | 4 ++ tests/config/test_data.config | 2 +- tests/modules/gatk4/genotypegvcfs/main.nf | 56 ++++++++-------------- tests/modules/gatk4/genotypegvcfs/test.yml | 9 ++++ 5 files changed, 38 insertions(+), 40 deletions(-) diff --git a/modules/gatk4/genotypegvcfs/main.nf b/modules/gatk4/genotypegvcfs/main.nf index 1a772860..50a6e188 100644 --- a/modules/gatk4/genotypegvcfs/main.nf +++ b/modules/gatk4/genotypegvcfs/main.nf @@ -2,10 +2,10 @@ process GATK4_GENOTYPEGVCFS { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::gatk4=4.2.0.0" : null) + conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.0.0--0' : - 'quay.io/biocontainers/gatk4:4.2.0.0--0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.2.3.0--hdfd78af_0' : + 'quay.io/biocontainers/gatk4:4.2.3.0--hdfd78af_0' }" input: tuple val(meta), path(gvcf), path(gvcf_index) @@ -18,6 +18,7 @@ process GATK4_GENOTYPEGVCFS { output: tuple val(meta), path("*.vcf.gz"), emit: vcf + tuple val(meta), path("*.tbi") , emit: tbi path "versions.yml" , emit: versions script: diff --git a/modules/gatk4/genotypegvcfs/meta.yml b/modules/gatk4/genotypegvcfs/meta.yml index cd7457a7..e6b38863 100644 --- a/modules/gatk4/genotypegvcfs/meta.yml +++ b/modules/gatk4/genotypegvcfs/meta.yml @@ -60,6 +60,10 @@ output: type: file description: Genotyped VCF file pattern: "*.vcf.gz" + - tbi: + type: file + description: Tbi index for VCF file + pattern: "*.vcf.gz" - versions: type: file description: File containing software versions diff --git a/tests/config/test_data.config b/tests/config/test_data.config index 31e17618..b2edcb6f 100644 --- a/tests/config/test_data.config +++ b/tests/config/test_data.config @@ -193,9 +193,9 @@ params { test2_baserecalibrator_table = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/test2.baserecalibrator.table" test_pileups_table = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/test.pileups.table" test2_pileups_table = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/test2.pileups.table" - test_genomicsdb_tar_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/test_genomicsdb.tar.gz" test_genomicsdb_tar_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/test_genomicsdb.tar.gz" + test_pon_genomicsdb_tar_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/test_pon_genomicsdb.tar.gz" test_test2_paired_mutect2_calls_vcf_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.vcf.gz" test_test2_paired_mutect2_calls_vcf_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.vcf.gz.tbi" diff --git a/tests/modules/gatk4/genotypegvcfs/main.nf b/tests/modules/gatk4/genotypegvcfs/main.nf index 208faf8b..ce13ea2d 100644 --- a/tests/modules/gatk4/genotypegvcfs/main.nf +++ b/tests/modules/gatk4/genotypegvcfs/main.nf @@ -7,7 +7,7 @@ include { UNTAR } from '../../../../modules/untar/main.nf' // Basic parameters with uncompressed VCF input workflow test_gatk4_genotypegvcfs_vcf_input { - + input = [ [ id:'test' ], // meta map file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf'], checkIfExists: true), file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_idx'], checkIfExists: true) ] @@ -21,7 +21,7 @@ workflow test_gatk4_genotypegvcfs_vcf_input { // Basic parameters with compressed VCF input workflow test_gatk4_genotypegvcfs_gz_input { - + input = [ [ id:'test' ], // meta map file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz'], checkIfExists: true), file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz_tbi'], checkIfExists: true) ] @@ -35,7 +35,7 @@ workflow test_gatk4_genotypegvcfs_gz_input { // Basic parameters + optional dbSNP workflow test_gatk4_genotypegvcfs_gz_input_dbsnp { - + input = [ [ id:'test' ], // meta map file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz'], checkIfExists: true), file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz_tbi'], checkIfExists: true) ] @@ -52,7 +52,7 @@ workflow test_gatk4_genotypegvcfs_gz_input_dbsnp { // Basic parameters + optional intervals workflow test_gatk4_genotypegvcfs_gz_input_intervals { - + input = [ [ id:'test' ], // meta map file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz'], checkIfExists: true), file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz_tbi'], checkIfExists: true) ] @@ -68,7 +68,7 @@ workflow test_gatk4_genotypegvcfs_gz_input_intervals { // Basic parameters + optional dbSNP + optional intervals workflow test_gatk4_genotypegvcfs_gz_input_dbsnp_intervals { - + input = [ [ id:'test' ], // meta map file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz'], checkIfExists: true), file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz_tbi'], checkIfExists: true) ] @@ -95,20 +95,16 @@ workflow test_gatk4_genotypegvcfs_gendb_input { test_genomicsdb = file(params.test_data['homo_sapiens']['illumina']['test_genomicsdb_tar_gz'], checkIfExists: true) UNTAR ( test_genomicsdb ) - - Channel.of(file("mock_gvcf_index.txt")).set{mock_gvcf_index} - Channel - .of([ id:'test' ]) - .combine(UNTAR.out.untar) - .combine(mock_gvcf_index) - .set{ input } + gendb = UNTAR.out.untar.collect() + gendb.add([]) + input = Channel.of([ id:'test' ]).combine(gendb) GATK4_GENOTYPEGVCFS ( input, fasta, fastaIndex, fastaDict, [], [], [] ) } // Basic parameters with GenomicsDB + optional dbSNP workflow test_gatk4_genotypegvcfs_gendb_input_dbsnp { - + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) fastaIndex = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) fastaDict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) @@ -119,20 +115,16 @@ workflow test_gatk4_genotypegvcfs_gendb_input_dbsnp { test_genomicsdb = file(params.test_data['homo_sapiens']['illumina']['test_genomicsdb_tar_gz'], checkIfExists: true) UNTAR ( test_genomicsdb ) - - Channel.of(file("mock_gvcf_index.txt")).set{mock_gvcf_index} - Channel - .of([ id:'test' ]) - .combine(UNTAR.out.untar) - .combine(mock_gvcf_index) - .set{ input } + gendb = UNTAR.out.untar.collect() + gendb.add([]) + input = Channel.of([ id:'test' ]).combine(gendb) GATK4_GENOTYPEGVCFS ( input, fasta, fastaIndex, fastaDict, dbsnp, dbsnpIndex, [] ) } // Basic parameters with GenomicsDB + optional intervals workflow test_gatk4_genotypegvcfs_gendb_input_intervals { - + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) fastaIndex = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) fastaDict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) @@ -142,20 +134,16 @@ workflow test_gatk4_genotypegvcfs_gendb_input_intervals { test_genomicsdb = file(params.test_data['homo_sapiens']['illumina']['test_genomicsdb_tar_gz'], checkIfExists: true) UNTAR ( test_genomicsdb ) - - Channel.of(file("mock_gvcf_index.txt")).set{mock_gvcf_index} - Channel - .of([ id:'test' ]) - .combine(UNTAR.out.untar) - .combine(mock_gvcf_index) - .set{ input } + gendb = UNTAR.out.untar.collect() + gendb.add([]) + input = Channel.of([ id:'test' ]).combine(gendb) GATK4_GENOTYPEGVCFS ( input, fasta, fastaIndex, fastaDict, [], [], intervalsBed ) } // Basic parameters with GenomicsDB + optional dbSNP + optional intervals workflow test_gatk4_genotypegvcfs_gendb_input_dbsnp_intervals { - + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) fastaIndex = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) fastaDict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) @@ -168,13 +156,9 @@ workflow test_gatk4_genotypegvcfs_gendb_input_dbsnp_intervals { test_genomicsdb = file(params.test_data['homo_sapiens']['illumina']['test_genomicsdb_tar_gz'], checkIfExists: true) UNTAR ( test_genomicsdb ) - - Channel.of(file("mock_gvcf_index.txt")).set{mock_gvcf_index} - Channel - .of([ id:'test' ]) - .combine(UNTAR.out.untar) - .combine(mock_gvcf_index) - .set{ input } + gendb = UNTAR.out.untar.collect() + gendb.add([]) + input = Channel.of([ id:'test' ]).combine(gendb) GATK4_GENOTYPEGVCFS ( input, fasta, fastaIndex, fastaDict, dbsnp, dbsnpIndex, intervalsBed ) } diff --git a/tests/modules/gatk4/genotypegvcfs/test.yml b/tests/modules/gatk4/genotypegvcfs/test.yml index 45201af2..891bc365 100644 --- a/tests/modules/gatk4/genotypegvcfs/test.yml +++ b/tests/modules/gatk4/genotypegvcfs/test.yml @@ -6,6 +6,7 @@ files: - path: output/gatk4/test.genotyped.vcf.gz contains: ['AC=1;AF=0.500;AN=2;BaseQRankSum=0.00;DP=211;ExcessHet=3.0103;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQRankSum=0.00;QD=0.95;ReadPosRankSum=1.09;SOR=0.680'] + - path: output/gatk4/test.genotyped.vcf.gz.tbi - name: gatk4 genotypegvcfs test_gatk4_genotypegvcfs_gz_input command: nextflow run ./tests/modules/gatk4/genotypegvcfs -entry test_gatk4_genotypegvcfs_gz_input -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/genotypegvcfs/nextflow.config @@ -15,6 +16,7 @@ files: - path: output/gatk4/test.genotyped.vcf.gz contains: ['AC=1;AF=0.500;AN=2;BaseQRankSum=0.00;DP=211;ExcessHet=3.0103;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQRankSum=0.00;QD=0.95;ReadPosRankSum=1.09;SOR=0.680'] + - path: output/gatk4/test.genotyped.vcf.gz.tbi - name: gatk4 genotypegvcfs test_gatk4_genotypegvcfs_gz_input_dbsnp command: nextflow run ./tests/modules/gatk4/genotypegvcfs -entry test_gatk4_genotypegvcfs_gz_input_dbsnp -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/genotypegvcfs/nextflow.config @@ -24,6 +26,7 @@ files: - path: output/gatk4/test.genotyped.vcf.gz contains: ['AC=1;AF=0.500;AN=2;BaseQRankSum=0.00;DB;DP=211;ExcessHet=3.0103;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQRankSum=0.00;QD=0.95;ReadPosRankSum=1.09;SOR=0.680'] + - path: output/gatk4/test.genotyped.vcf.gz.tbi - name: gatk4 genotypegvcfs test_gatk4_genotypegvcfs_gz_input_intervals command: nextflow run ./tests/modules/gatk4/genotypegvcfs -entry test_gatk4_genotypegvcfs_gz_input_intervals -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/genotypegvcfs/nextflow.config @@ -33,6 +36,7 @@ files: - path: output/gatk4/test.genotyped.vcf.gz contains: ['AC=1;AF=0.500;AN=2;BaseQRankSum=0.00;DP=211;ExcessHet=3.0103;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQRankSum=0.00;QD=0.95;ReadPosRankSum=1.09;SOR=0.680'] + - path: output/gatk4/test.genotyped.vcf.gz.tbi - name: gatk4 genotypegvcfs test_gatk4_genotypegvcfs_gz_input_dbsnp_intervals command: nextflow run ./tests/modules/gatk4/genotypegvcfs -entry test_gatk4_genotypegvcfs_gz_input_dbsnp_intervals -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/genotypegvcfs/nextflow.config @@ -42,6 +46,7 @@ files: - path: output/gatk4/test.genotyped.vcf.gz contains: ['AC=1;AF=0.500;AN=2;BaseQRankSum=0.00;DB;DP=211;ExcessHet=3.0103;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQRankSum=0.00;QD=0.95;ReadPosRankSum=1.09;SOR=0.680'] + - path: output/gatk4/test.genotyped.vcf.gz.tbi - name: gatk4 genotypegvcfs test_gatk4_genotypegvcfs_gendb_input command: nextflow run ./tests/modules/gatk4/genotypegvcfs -entry test_gatk4_genotypegvcfs_gendb_input -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/genotypegvcfs/nextflow.config @@ -51,6 +56,7 @@ files: - path: output/gatk4/test.genotyped.vcf.gz contains: ['AC=1;AF=0.500;AN=2;BaseQRankSum=0.00;DP=211;ExcessHet=3.0103;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQRankSum=0.00;QD=0.95;ReadPosRankSum=1.09;SOR=0.680'] + - path: output/gatk4/test.genotyped.vcf.gz.tbi - name: gatk4 genotypegvcfs test_gatk4_genotypegvcfs_gendb_input_dbsnp command: nextflow run ./tests/modules/gatk4/genotypegvcfs -entry test_gatk4_genotypegvcfs_gendb_input_dbsnp -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/genotypegvcfs/nextflow.config @@ -60,6 +66,7 @@ files: - path: output/gatk4/test.genotyped.vcf.gz contains: ['AC=1;AF=0.500;AN=2;BaseQRankSum=0.00;DB;DP=211;ExcessHet=3.0103;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQRankSum=0.00;QD=0.95;ReadPosRankSum=1.09;SOR=0.680'] + - path: output/gatk4/test.genotyped.vcf.gz.tbi - name: gatk4 genotypegvcfs test_gatk4_genotypegvcfs_gendb_input_intervals command: nextflow run ./tests/modules/gatk4/genotypegvcfs -entry test_gatk4_genotypegvcfs_gendb_input_intervals -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/genotypegvcfs/nextflow.config @@ -69,6 +76,7 @@ files: - path: output/gatk4/test.genotyped.vcf.gz contains: ['AC=1;AF=0.500;AN=2;BaseQRankSum=0.00;DP=211;ExcessHet=3.0103;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQRankSum=0.00;QD=0.95;ReadPosRankSum=1.09;SOR=0.680'] + - path: output/gatk4/test.genotyped.vcf.gz.tbi - name: gatk4 genotypegvcfs test_gatk4_genotypegvcfs_gendb_input_dbsnp_intervals command: nextflow run ./tests/modules/gatk4/genotypegvcfs -entry test_gatk4_genotypegvcfs_gendb_input_dbsnp_intervals -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/genotypegvcfs/nextflow.config @@ -78,3 +86,4 @@ files: - path: output/gatk4/test.genotyped.vcf.gz contains: ['AC=1;AF=0.500;AN=2;BaseQRankSum=0.00;DB;DP=211;ExcessHet=3.0103;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQRankSum=0.00;QD=0.95;ReadPosRankSum=1.09;SOR=0.680'] + - path: output/gatk4/test.genotyped.vcf.gz.tbi