From 34268347447cd60013b69279a42aa9d081592735 Mon Sep 17 00:00:00 2001 From: "Maxime U. Garcia" Date: Mon, 15 Nov 2021 18:03:02 +0100 Subject: [PATCH] feat: fastaidx -> fai (#1073) --- modules/gatk4/applybqsr/main.nf | 2 +- modules/gatk4/applybqsr/meta.yml | 5 +++- modules/gatk4/baserecalibrator/main.nf | 2 +- modules/gatk4/baserecalibrator/meta.yml | 5 +++- .../gatk4/createsomaticpanelofnormals/main.nf | 2 +- .../createsomaticpanelofnormals/meta.yml | 4 ++-- modules/gatk4/filtermutectcalls/main.nf | 2 +- modules/gatk4/filtermutectcalls/meta.yml | 4 ++-- modules/gatk4/mutect2/main.nf | 2 +- modules/gatk4/mutect2/meta.yml | 4 ++-- .../nf-core/gatk_create_som_pon/main.nf | 6 ++--- .../nf-core/gatk_create_som_pon/meta.yml | 4 ++-- tests/modules/gatk4/filtermutectcalls/main.nf | 18 +++++++------- tests/modules/gatk4/mutect2/main.nf | 24 +++++++++---------- .../nf-core/gatk_create_som_pon/main.nf | 5 ++-- 15 files changed, 47 insertions(+), 42 deletions(-) diff --git a/modules/gatk4/applybqsr/main.nf b/modules/gatk4/applybqsr/main.nf index e1a4d7b4..c89a4a4d 100644 --- a/modules/gatk4/applybqsr/main.nf +++ b/modules/gatk4/applybqsr/main.nf @@ -21,7 +21,7 @@ process GATK4_APPLYBQSR { input: tuple val(meta), path(input), path(input_index), path(bqsr_table) path fasta - path fastaidx + path fai path dict path intervals diff --git a/modules/gatk4/applybqsr/meta.yml b/modules/gatk4/applybqsr/meta.yml index e7419860..4e3b2f9a 100644 --- a/modules/gatk4/applybqsr/meta.yml +++ b/modules/gatk4/applybqsr/meta.yml @@ -34,12 +34,15 @@ input: - fasta: type: file description: The reference fasta file - - fastaidx: + pattern: "*.fasta" + - fai: type: file description: Index of reference fasta file + pattern: "*.fasta.fai" - dict: type: file description: GATK sequence dictionary + pattern: "*.dict" - intervalsBed: type: file description: Bed file with the genomic regions included in the library (optional) diff --git a/modules/gatk4/baserecalibrator/main.nf b/modules/gatk4/baserecalibrator/main.nf index ff9eb1f9..ce6f5906 100644 --- a/modules/gatk4/baserecalibrator/main.nf +++ b/modules/gatk4/baserecalibrator/main.nf @@ -21,7 +21,7 @@ process GATK4_BASERECALIBRATOR { input: tuple val(meta), path(input), path(input_index) path fasta - path fastaidx + path fai path dict path intervalsBed path knownSites diff --git a/modules/gatk4/baserecalibrator/meta.yml b/modules/gatk4/baserecalibrator/meta.yml index 7fd273e1..188340b4 100644 --- a/modules/gatk4/baserecalibrator/meta.yml +++ b/modules/gatk4/baserecalibrator/meta.yml @@ -31,12 +31,15 @@ input: - fasta: type: file description: The reference fasta file - - fastaidx: + pattern: "*.fasta" + - fai: type: file description: Index of reference fasta file + pattern: "*.fasta.fai" - dict: type: file description: GATK sequence dictionary + pattern: "*.dict" - intervalsBed: type: file description: Bed file with the genomic regions included in the library (optional) diff --git a/modules/gatk4/createsomaticpanelofnormals/main.nf b/modules/gatk4/createsomaticpanelofnormals/main.nf index b3685171..49136256 100644 --- a/modules/gatk4/createsomaticpanelofnormals/main.nf +++ b/modules/gatk4/createsomaticpanelofnormals/main.nf @@ -21,7 +21,7 @@ process GATK4_CREATESOMATICPANELOFNORMALS { input: tuple val(meta), path(genomicsdb) path fasta - path fastaidx + path fai path dict output: diff --git a/modules/gatk4/createsomaticpanelofnormals/meta.yml b/modules/gatk4/createsomaticpanelofnormals/meta.yml index f0199ed6..e450c68a 100644 --- a/modules/gatk4/createsomaticpanelofnormals/meta.yml +++ b/modules/gatk4/createsomaticpanelofnormals/meta.yml @@ -28,10 +28,10 @@ input: type: file description: The reference fasta file pattern: "*.fasta" - - fastaidx: + - fai: type: file description: Index of reference fasta file - pattern: "fasta.fai" + pattern: "*.fasta.fai" - dict: type: file description: GATK sequence dictionary diff --git a/modules/gatk4/filtermutectcalls/main.nf b/modules/gatk4/filtermutectcalls/main.nf index b54e07ed..6e10ff0f 100644 --- a/modules/gatk4/filtermutectcalls/main.nf +++ b/modules/gatk4/filtermutectcalls/main.nf @@ -21,7 +21,7 @@ process GATK4_FILTERMUTECTCALLS { input: tuple val(meta), path(vcf), path(tbi), path(stats), path(orientationbias), path(segmentation), path(contaminationfile), val(contaminationest) path fasta - path fastaidx + path fai path dict output: diff --git a/modules/gatk4/filtermutectcalls/meta.yml b/modules/gatk4/filtermutectcalls/meta.yml index f14f9404..7d85e2b9 100644 --- a/modules/gatk4/filtermutectcalls/meta.yml +++ b/modules/gatk4/filtermutectcalls/meta.yml @@ -53,10 +53,10 @@ input: type: file description: The reference fasta file pattern: "*.fasta" - - fastaidx: + - fai: type: file description: Index of reference fasta file - pattern: "fasta.fai" + pattern: "*.fasta.fai" - dict: type: file description: GATK sequence dictionary diff --git a/modules/gatk4/mutect2/main.nf b/modules/gatk4/mutect2/main.nf index dd8da406..e0e2661b 100644 --- a/modules/gatk4/mutect2/main.nf +++ b/modules/gatk4/mutect2/main.nf @@ -25,7 +25,7 @@ process GATK4_MUTECT2 { val run_mito val interval_label path fasta - path fastaidx + path fai path dict path germline_resource path germline_resource_tbi diff --git a/modules/gatk4/mutect2/meta.yml b/modules/gatk4/mutect2/meta.yml index 4a49b07a..83f6cb7c 100644 --- a/modules/gatk4/mutect2/meta.yml +++ b/modules/gatk4/mutect2/meta.yml @@ -54,10 +54,10 @@ input: type: file description: The reference fasta file pattern: "*.fasta" - - fastaidx: + - fai: type: file description: Index of reference fasta file - pattern: "fasta.fai" + pattern: "*.fasta.fai" - dict: type: file description: GATK sequence dictionary diff --git a/subworkflows/nf-core/gatk_create_som_pon/main.nf b/subworkflows/nf-core/gatk_create_som_pon/main.nf index 9b190584..40269a4a 100644 --- a/subworkflows/nf-core/gatk_create_som_pon/main.nf +++ b/subworkflows/nf-core/gatk_create_som_pon/main.nf @@ -14,7 +14,7 @@ workflow GATK_CREATE_SOM_PON { take: ch_mutect2_in // channel: [ val(meta), [ input ], [ input_index ], [] ] fasta // channel: /path/to/reference/fasta - fastaidx // channel: /path/to/reference/fasta/index + fai // channel: /path/to/reference/fasta/index dict // channel: /path/to/reference/fasta/dictionary pon_name // channel: name for panel of normals interval_file // channel: /path/to/interval/file @@ -25,7 +25,7 @@ workflow GATK_CREATE_SOM_PON { // //Perform variant calling for each sample using mutect2 module in panel of normals mode. // - GATK4_MUTECT2 ( input , false , true, false , [] , fasta , fastaidx , dict , [], [] , [] , [] ) + GATK4_MUTECT2 ( input, false, true, false, [], fasta, fai, dict, [], [], [], [] ) ch_versions = ch_versions.mix(GATK4_MUTECT2.out.versions.first()) // @@ -41,7 +41,7 @@ workflow GATK_CREATE_SOM_PON { //Panel of normals made from genomicsdb workspace using createsomaticpanelofnormals. // GATK4_GENOMICSDBIMPORT.out.genomicsdb.view() - GATK4_CREATESOMATICPANELOFNORMALS ( GATK4_GENOMICSDBIMPORT.out.genomicsdb, fasta, fastaidx, dict ) + GATK4_CREATESOMATICPANELOFNORMALS ( GATK4_GENOMICSDBIMPORT.out.genomicsdb, fasta, fai, dict ) ch_versions = ch_versions.mix(GATK4_CREATESOMATICPANELOFNORMALS.out.versions.first()) emit: diff --git a/subworkflows/nf-core/gatk_create_som_pon/meta.yml b/subworkflows/nf-core/gatk_create_som_pon/meta.yml index bc02b885..07404aae 100644 --- a/subworkflows/nf-core/gatk_create_som_pon/meta.yml +++ b/subworkflows/nf-core/gatk_create_som_pon/meta.yml @@ -30,10 +30,10 @@ input: type: file description: The reference fasta file pattern: "*.fasta" - - fastaidx: + - fai: type: file description: Index of reference fasta file - pattern: "fasta.fai" + pattern: "*.fasta.fai" - dict: type: file description: GATK sequence dictionary diff --git a/tests/modules/gatk4/filtermutectcalls/main.nf b/tests/modules/gatk4/filtermutectcalls/main.nf index a425238b..5b2938e8 100644 --- a/tests/modules/gatk4/filtermutectcalls/main.nf +++ b/tests/modules/gatk4/filtermutectcalls/main.nf @@ -6,7 +6,7 @@ include { GATK4_FILTERMUTECTCALLS } from '../../../../modules/gatk4/filtermutect workflow test_gatk4_filtermutectcalls_base { - input = [ + input = [ [ id:'test'], // meta map file(params.test_data['homo_sapiens']['illumina']['test_test2_paired_mutect2_calls_vcf_gz'], checkIfExists: true), file(params.test_data['homo_sapiens']['illumina']['test_test2_paired_mutect2_calls_vcf_gz_tbi'], checkIfExists: true), @@ -18,15 +18,15 @@ workflow test_gatk4_filtermutectcalls_base { ] fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) - fastaidx = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) - GATK4_FILTERMUTECTCALLS ( input, fasta, fastaidx, dict ) + GATK4_FILTERMUTECTCALLS ( input, fasta, fai, dict ) } workflow test_gatk4_filtermutectcalls_with_files { - input = [ + input = [ [ id:'test'], // meta map file(params.test_data['homo_sapiens']['illumina']['test_test2_paired_mutect2_calls_vcf_gz'], checkIfExists: true), file(params.test_data['homo_sapiens']['illumina']['test_test2_paired_mutect2_calls_vcf_gz_tbi'], checkIfExists: true), @@ -38,15 +38,15 @@ workflow test_gatk4_filtermutectcalls_with_files { ] fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) - fastaidx = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) - GATK4_FILTERMUTECTCALLS ( input, fasta, fastaidx, dict ) + GATK4_FILTERMUTECTCALLS ( input, fasta, fai, dict ) } workflow test_gatk4_filtermutectcalls_use_val { - input = [ + input = [ [ id:'test'], // meta map file(params.test_data['homo_sapiens']['illumina']['test_test2_paired_mutect2_calls_vcf_gz'], checkIfExists: true), file(params.test_data['homo_sapiens']['illumina']['test_test2_paired_mutect2_calls_vcf_gz_tbi'], checkIfExists: true), @@ -58,8 +58,8 @@ workflow test_gatk4_filtermutectcalls_use_val { ] fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) - fastaidx = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) - GATK4_FILTERMUTECTCALLS ( input, fasta, fastaidx, dict ) + GATK4_FILTERMUTECTCALLS ( input, fasta, fai, dict ) } diff --git a/tests/modules/gatk4/mutect2/main.nf b/tests/modules/gatk4/mutect2/main.nf index a3821b64..e163cf9c 100644 --- a/tests/modules/gatk4/mutect2/main.nf +++ b/tests/modules/gatk4/mutect2/main.nf @@ -8,8 +8,8 @@ include { GATK4_MUTECT2 as GATK4_TEMPFIX_MUTECT2 } from '../../../../modules/gat workflow test_gatk4_mutect2_tumor_normal_pair { input = [ [ id:'test'], // meta map - [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam'], checkIfExists: true) , file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam'], checkIfExists: true)], - [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true) , file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true)], + [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam'], checkIfExists: true), file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam'], checkIfExists: true)], + [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true), file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true)], ["testN"] ] run_single = false @@ -17,14 +17,14 @@ workflow test_gatk4_mutect2_tumor_normal_pair { run_mito = false interval_label = [] fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) - fastaidx = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) germline_resource = file(params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_vcf_gz'], checkIfExists: true) germline_resource_tbi = file(params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_vcf_gz_tbi'], checkIfExists: true) panel_of_normals = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_vcf_gz'], checkIfExists: true) panel_of_normals_tbi = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_vcf_gz_tbi'], checkIfExists: true) - GATK4_TEMPFIX_MUTECT2 ( input , run_single , run_pon , run_mito , interval_label , fasta , fastaidx , dict , germline_resource, germline_resource_tbi , panel_of_normals , panel_of_normals_tbi ) + GATK4_TEMPFIX_MUTECT2 ( input, run_single, run_pon, run_mito, interval_label, fasta, fai, dict, germline_resource, germline_resource_tbi, panel_of_normals, panel_of_normals_tbi ) } workflow test_gatk4_mutect2_tumor_single { @@ -38,14 +38,14 @@ workflow test_gatk4_mutect2_tumor_single { run_mito = false interval_label = [] fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) - fastaidx = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) germline_resource = file(params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_vcf_gz'], checkIfExists: true) germline_resource_tbi = file(params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_vcf_gz_tbi'], checkIfExists: true) panel_of_normals = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_vcf_gz'], checkIfExists: true) panel_of_normals_tbi = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_vcf_gz_tbi'], checkIfExists: true) - GATK4_MUTECT2 ( input , run_single , run_pon , run_mito , interval_label , fasta , fastaidx , dict , germline_resource, germline_resource_tbi , panel_of_normals , panel_of_normals_tbi ) + GATK4_MUTECT2 ( input, run_single, run_pon, run_mito, interval_label, fasta, fai, dict, germline_resource, germline_resource_tbi, panel_of_normals, panel_of_normals_tbi ) } workflow test_gatk4_mutect2_cram_input { @@ -59,14 +59,14 @@ workflow test_gatk4_mutect2_cram_input { run_mito = false interval_label = [] fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) - fastaidx = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) germline_resource = file(params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_vcf_gz'], checkIfExists: true) germline_resource_tbi = file(params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_vcf_gz_tbi'], checkIfExists: true) panel_of_normals = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_vcf_gz'], checkIfExists: true) panel_of_normals_tbi = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_vcf_gz_tbi'], checkIfExists: true) - GATK4_MUTECT2 ( input , run_single , run_pon , run_mito , interval_label , fasta , fastaidx , dict , germline_resource, germline_resource_tbi , panel_of_normals , panel_of_normals_tbi ) + GATK4_MUTECT2 ( input, run_single, run_pon, run_mito, interval_label, fasta, fai, dict, germline_resource, germline_resource_tbi, panel_of_normals, panel_of_normals_tbi ) } workflow test_gatk4_mutect2_generate_pon { @@ -80,14 +80,14 @@ workflow test_gatk4_mutect2_generate_pon { run_mito = false interval_label = [] fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) - fastaidx = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) germline_resource = [] germline_resource_tbi = [] panel_of_normals = [] panel_of_normals_tbi = [] - GATK4_MUTECT2 ( input , run_single , run_pon, run_mito , interval_label , fasta , fastaidx , dict , germline_resource, germline_resource_tbi , panel_of_normals , panel_of_normals_tbi ) + GATK4_MUTECT2 ( input, run_single, run_pon, run_mito, interval_label, fasta, fai, dict, germline_resource, germline_resource_tbi, panel_of_normals, panel_of_normals_tbi ) } // mitochondria mode would ideally have some mitochondria test data, but since the mitochondria settings only increase detection sensitivity, we can use the chr22 data as a stand in as it is already a small dataset, the extra variants detected compared to generate_pon shows the mode is working. @@ -102,12 +102,12 @@ workflow test_gatk4_mutect2_mitochondria { run_mito = true interval_label = 'chr22' fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) - fastaidx = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) germline_resource = [] germline_resource_tbi = [] panel_of_normals = [] panel_of_normals_tbi = [] - GATK4_MUTECT2 ( input , run_single , run_pon, run_mito , interval_label , fasta , fastaidx , dict , germline_resource, germline_resource_tbi , panel_of_normals , panel_of_normals_tbi ) + GATK4_MUTECT2 ( input, run_single, run_pon, run_mito, interval_label, fasta, fai, dict, germline_resource, germline_resource_tbi, panel_of_normals, panel_of_normals_tbi ) } diff --git a/tests/subworkflows/nf-core/gatk_create_som_pon/main.nf b/tests/subworkflows/nf-core/gatk_create_som_pon/main.nf index d484ac2f..42427a1f 100644 --- a/tests/subworkflows/nf-core/gatk_create_som_pon/main.nf +++ b/tests/subworkflows/nf-core/gatk_create_som_pon/main.nf @@ -16,11 +16,10 @@ workflow test_gatk_create_som_pon { [] ] ] fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) - fastaidx = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) pon_name = "test_panel" interval_file = file(params.test_data['homo_sapiens']['genome']['genome_interval_list'], checkIfExists: true) - GATK_CREATE_SOM_PON ( ch_mutect2_in, fasta, fastaidx, dict, pon_name, interval_file ) - + GATK_CREATE_SOM_PON ( ch_mutect2_in, fasta, fai, dict, pon_name, interval_file ) }