feat: fastaidx -> fai (#1073)

This commit is contained in:
Maxime U. Garcia 2021-11-15 18:03:02 +01:00 committed by GitHub
parent 5b1ce484b9
commit 3426834744
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
15 changed files with 47 additions and 42 deletions

View file

@ -21,7 +21,7 @@ process GATK4_APPLYBQSR {
input: input:
tuple val(meta), path(input), path(input_index), path(bqsr_table) tuple val(meta), path(input), path(input_index), path(bqsr_table)
path fasta path fasta
path fastaidx path fai
path dict path dict
path intervals path intervals

View file

@ -34,12 +34,15 @@ input:
- fasta: - fasta:
type: file type: file
description: The reference fasta file description: The reference fasta file
- fastaidx: pattern: "*.fasta"
- fai:
type: file type: file
description: Index of reference fasta file description: Index of reference fasta file
pattern: "*.fasta.fai"
- dict: - dict:
type: file type: file
description: GATK sequence dictionary description: GATK sequence dictionary
pattern: "*.dict"
- intervalsBed: - intervalsBed:
type: file type: file
description: Bed file with the genomic regions included in the library (optional) description: Bed file with the genomic regions included in the library (optional)

View file

@ -21,7 +21,7 @@ process GATK4_BASERECALIBRATOR {
input: input:
tuple val(meta), path(input), path(input_index) tuple val(meta), path(input), path(input_index)
path fasta path fasta
path fastaidx path fai
path dict path dict
path intervalsBed path intervalsBed
path knownSites path knownSites

View file

@ -31,12 +31,15 @@ input:
- fasta: - fasta:
type: file type: file
description: The reference fasta file description: The reference fasta file
- fastaidx: pattern: "*.fasta"
- fai:
type: file type: file
description: Index of reference fasta file description: Index of reference fasta file
pattern: "*.fasta.fai"
- dict: - dict:
type: file type: file
description: GATK sequence dictionary description: GATK sequence dictionary
pattern: "*.dict"
- intervalsBed: - intervalsBed:
type: file type: file
description: Bed file with the genomic regions included in the library (optional) description: Bed file with the genomic regions included in the library (optional)

View file

@ -21,7 +21,7 @@ process GATK4_CREATESOMATICPANELOFNORMALS {
input: input:
tuple val(meta), path(genomicsdb) tuple val(meta), path(genomicsdb)
path fasta path fasta
path fastaidx path fai
path dict path dict
output: output:

View file

@ -28,10 +28,10 @@ input:
type: file type: file
description: The reference fasta file description: The reference fasta file
pattern: "*.fasta" pattern: "*.fasta"
- fastaidx: - fai:
type: file type: file
description: Index of reference fasta file description: Index of reference fasta file
pattern: "fasta.fai" pattern: "*.fasta.fai"
- dict: - dict:
type: file type: file
description: GATK sequence dictionary description: GATK sequence dictionary

View file

@ -21,7 +21,7 @@ process GATK4_FILTERMUTECTCALLS {
input: input:
tuple val(meta), path(vcf), path(tbi), path(stats), path(orientationbias), path(segmentation), path(contaminationfile), val(contaminationest) tuple val(meta), path(vcf), path(tbi), path(stats), path(orientationbias), path(segmentation), path(contaminationfile), val(contaminationest)
path fasta path fasta
path fastaidx path fai
path dict path dict
output: output:

View file

@ -53,10 +53,10 @@ input:
type: file type: file
description: The reference fasta file description: The reference fasta file
pattern: "*.fasta" pattern: "*.fasta"
- fastaidx: - fai:
type: file type: file
description: Index of reference fasta file description: Index of reference fasta file
pattern: "fasta.fai" pattern: "*.fasta.fai"
- dict: - dict:
type: file type: file
description: GATK sequence dictionary description: GATK sequence dictionary

View file

@ -25,7 +25,7 @@ process GATK4_MUTECT2 {
val run_mito val run_mito
val interval_label val interval_label
path fasta path fasta
path fastaidx path fai
path dict path dict
path germline_resource path germline_resource
path germline_resource_tbi path germline_resource_tbi

View file

@ -54,10 +54,10 @@ input:
type: file type: file
description: The reference fasta file description: The reference fasta file
pattern: "*.fasta" pattern: "*.fasta"
- fastaidx: - fai:
type: file type: file
description: Index of reference fasta file description: Index of reference fasta file
pattern: "fasta.fai" pattern: "*.fasta.fai"
- dict: - dict:
type: file type: file
description: GATK sequence dictionary description: GATK sequence dictionary

View file

@ -14,7 +14,7 @@ workflow GATK_CREATE_SOM_PON {
take: take:
ch_mutect2_in // channel: [ val(meta), [ input ], [ input_index ], [] ] ch_mutect2_in // channel: [ val(meta), [ input ], [ input_index ], [] ]
fasta // channel: /path/to/reference/fasta fasta // channel: /path/to/reference/fasta
fastaidx // channel: /path/to/reference/fasta/index fai // channel: /path/to/reference/fasta/index
dict // channel: /path/to/reference/fasta/dictionary dict // channel: /path/to/reference/fasta/dictionary
pon_name // channel: name for panel of normals pon_name // channel: name for panel of normals
interval_file // channel: /path/to/interval/file interval_file // channel: /path/to/interval/file
@ -25,7 +25,7 @@ workflow GATK_CREATE_SOM_PON {
// //
//Perform variant calling for each sample using mutect2 module in panel of normals mode. //Perform variant calling for each sample using mutect2 module in panel of normals mode.
// //
GATK4_MUTECT2 ( input , false , true, false , [] , fasta , fastaidx , dict , [], [] , [] , [] ) GATK4_MUTECT2 ( input, false, true, false, [], fasta, fai, dict, [], [], [], [] )
ch_versions = ch_versions.mix(GATK4_MUTECT2.out.versions.first()) ch_versions = ch_versions.mix(GATK4_MUTECT2.out.versions.first())
// //
@ -41,7 +41,7 @@ workflow GATK_CREATE_SOM_PON {
//Panel of normals made from genomicsdb workspace using createsomaticpanelofnormals. //Panel of normals made from genomicsdb workspace using createsomaticpanelofnormals.
// //
GATK4_GENOMICSDBIMPORT.out.genomicsdb.view() GATK4_GENOMICSDBIMPORT.out.genomicsdb.view()
GATK4_CREATESOMATICPANELOFNORMALS ( GATK4_GENOMICSDBIMPORT.out.genomicsdb, fasta, fastaidx, dict ) GATK4_CREATESOMATICPANELOFNORMALS ( GATK4_GENOMICSDBIMPORT.out.genomicsdb, fasta, fai, dict )
ch_versions = ch_versions.mix(GATK4_CREATESOMATICPANELOFNORMALS.out.versions.first()) ch_versions = ch_versions.mix(GATK4_CREATESOMATICPANELOFNORMALS.out.versions.first())
emit: emit:

View file

@ -30,10 +30,10 @@ input:
type: file type: file
description: The reference fasta file description: The reference fasta file
pattern: "*.fasta" pattern: "*.fasta"
- fastaidx: - fai:
type: file type: file
description: Index of reference fasta file description: Index of reference fasta file
pattern: "fasta.fai" pattern: "*.fasta.fai"
- dict: - dict:
type: file type: file
description: GATK sequence dictionary description: GATK sequence dictionary

View file

@ -18,10 +18,10 @@ workflow test_gatk4_filtermutectcalls_base {
] ]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fastaidx = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
GATK4_FILTERMUTECTCALLS ( input, fasta, fastaidx, dict ) GATK4_FILTERMUTECTCALLS ( input, fasta, fai, dict )
} }
workflow test_gatk4_filtermutectcalls_with_files { workflow test_gatk4_filtermutectcalls_with_files {
@ -38,10 +38,10 @@ workflow test_gatk4_filtermutectcalls_with_files {
] ]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fastaidx = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
GATK4_FILTERMUTECTCALLS ( input, fasta, fastaidx, dict ) GATK4_FILTERMUTECTCALLS ( input, fasta, fai, dict )
} }
workflow test_gatk4_filtermutectcalls_use_val { workflow test_gatk4_filtermutectcalls_use_val {
@ -58,8 +58,8 @@ workflow test_gatk4_filtermutectcalls_use_val {
] ]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fastaidx = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
GATK4_FILTERMUTECTCALLS ( input, fasta, fastaidx, dict ) GATK4_FILTERMUTECTCALLS ( input, fasta, fai, dict )
} }

View file

@ -8,8 +8,8 @@ include { GATK4_MUTECT2 as GATK4_TEMPFIX_MUTECT2 } from '../../../../modules/gat
workflow test_gatk4_mutect2_tumor_normal_pair { workflow test_gatk4_mutect2_tumor_normal_pair {
input = [ [ id:'test'], // meta map input = [ [ id:'test'], // meta map
[ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam'], checkIfExists: true) , file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam'], checkIfExists: true)], [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam'], checkIfExists: true), file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam'], checkIfExists: true)],
[ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true) , file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true)], [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true), file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true)],
["testN"] ["testN"]
] ]
run_single = false run_single = false
@ -17,14 +17,14 @@ workflow test_gatk4_mutect2_tumor_normal_pair {
run_mito = false run_mito = false
interval_label = [] interval_label = []
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fastaidx = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
germline_resource = file(params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_vcf_gz'], checkIfExists: true) germline_resource = file(params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_vcf_gz'], checkIfExists: true)
germline_resource_tbi = file(params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_vcf_gz_tbi'], checkIfExists: true) germline_resource_tbi = file(params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_vcf_gz_tbi'], checkIfExists: true)
panel_of_normals = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_vcf_gz'], checkIfExists: true) panel_of_normals = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_vcf_gz'], checkIfExists: true)
panel_of_normals_tbi = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_vcf_gz_tbi'], checkIfExists: true) panel_of_normals_tbi = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_vcf_gz_tbi'], checkIfExists: true)
GATK4_TEMPFIX_MUTECT2 ( input , run_single , run_pon , run_mito , interval_label , fasta , fastaidx , dict , germline_resource, germline_resource_tbi , panel_of_normals , panel_of_normals_tbi ) GATK4_TEMPFIX_MUTECT2 ( input, run_single, run_pon, run_mito, interval_label, fasta, fai, dict, germline_resource, germline_resource_tbi, panel_of_normals, panel_of_normals_tbi )
} }
workflow test_gatk4_mutect2_tumor_single { workflow test_gatk4_mutect2_tumor_single {
@ -38,14 +38,14 @@ workflow test_gatk4_mutect2_tumor_single {
run_mito = false run_mito = false
interval_label = [] interval_label = []
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fastaidx = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
germline_resource = file(params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_vcf_gz'], checkIfExists: true) germline_resource = file(params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_vcf_gz'], checkIfExists: true)
germline_resource_tbi = file(params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_vcf_gz_tbi'], checkIfExists: true) germline_resource_tbi = file(params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_vcf_gz_tbi'], checkIfExists: true)
panel_of_normals = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_vcf_gz'], checkIfExists: true) panel_of_normals = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_vcf_gz'], checkIfExists: true)
panel_of_normals_tbi = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_vcf_gz_tbi'], checkIfExists: true) panel_of_normals_tbi = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_vcf_gz_tbi'], checkIfExists: true)
GATK4_MUTECT2 ( input , run_single , run_pon , run_mito , interval_label , fasta , fastaidx , dict , germline_resource, germline_resource_tbi , panel_of_normals , panel_of_normals_tbi ) GATK4_MUTECT2 ( input, run_single, run_pon, run_mito, interval_label, fasta, fai, dict, germline_resource, germline_resource_tbi, panel_of_normals, panel_of_normals_tbi )
} }
workflow test_gatk4_mutect2_cram_input { workflow test_gatk4_mutect2_cram_input {
@ -59,14 +59,14 @@ workflow test_gatk4_mutect2_cram_input {
run_mito = false run_mito = false
interval_label = [] interval_label = []
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fastaidx = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
germline_resource = file(params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_vcf_gz'], checkIfExists: true) germline_resource = file(params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_vcf_gz'], checkIfExists: true)
germline_resource_tbi = file(params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_vcf_gz_tbi'], checkIfExists: true) germline_resource_tbi = file(params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_vcf_gz_tbi'], checkIfExists: true)
panel_of_normals = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_vcf_gz'], checkIfExists: true) panel_of_normals = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_vcf_gz'], checkIfExists: true)
panel_of_normals_tbi = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_vcf_gz_tbi'], checkIfExists: true) panel_of_normals_tbi = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_vcf_gz_tbi'], checkIfExists: true)
GATK4_MUTECT2 ( input , run_single , run_pon , run_mito , interval_label , fasta , fastaidx , dict , germline_resource, germline_resource_tbi , panel_of_normals , panel_of_normals_tbi ) GATK4_MUTECT2 ( input, run_single, run_pon, run_mito, interval_label, fasta, fai, dict, germline_resource, germline_resource_tbi, panel_of_normals, panel_of_normals_tbi )
} }
workflow test_gatk4_mutect2_generate_pon { workflow test_gatk4_mutect2_generate_pon {
@ -80,14 +80,14 @@ workflow test_gatk4_mutect2_generate_pon {
run_mito = false run_mito = false
interval_label = [] interval_label = []
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fastaidx = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
germline_resource = [] germline_resource = []
germline_resource_tbi = [] germline_resource_tbi = []
panel_of_normals = [] panel_of_normals = []
panel_of_normals_tbi = [] panel_of_normals_tbi = []
GATK4_MUTECT2 ( input , run_single , run_pon, run_mito , interval_label , fasta , fastaidx , dict , germline_resource, germline_resource_tbi , panel_of_normals , panel_of_normals_tbi ) GATK4_MUTECT2 ( input, run_single, run_pon, run_mito, interval_label, fasta, fai, dict, germline_resource, germline_resource_tbi, panel_of_normals, panel_of_normals_tbi )
} }
// mitochondria mode would ideally have some mitochondria test data, but since the mitochondria settings only increase detection sensitivity, we can use the chr22 data as a stand in as it is already a small dataset, the extra variants detected compared to generate_pon shows the mode is working. // mitochondria mode would ideally have some mitochondria test data, but since the mitochondria settings only increase detection sensitivity, we can use the chr22 data as a stand in as it is already a small dataset, the extra variants detected compared to generate_pon shows the mode is working.
@ -102,12 +102,12 @@ workflow test_gatk4_mutect2_mitochondria {
run_mito = true run_mito = true
interval_label = 'chr22' interval_label = 'chr22'
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fastaidx = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
germline_resource = [] germline_resource = []
germline_resource_tbi = [] germline_resource_tbi = []
panel_of_normals = [] panel_of_normals = []
panel_of_normals_tbi = [] panel_of_normals_tbi = []
GATK4_MUTECT2 ( input , run_single , run_pon, run_mito , interval_label , fasta , fastaidx , dict , germline_resource, germline_resource_tbi , panel_of_normals , panel_of_normals_tbi ) GATK4_MUTECT2 ( input, run_single, run_pon, run_mito, interval_label, fasta, fai, dict, germline_resource, germline_resource_tbi, panel_of_normals, panel_of_normals_tbi )
} }

View file

@ -16,11 +16,10 @@ workflow test_gatk_create_som_pon {
[] ] [] ]
] ]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fastaidx = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
pon_name = "test_panel" pon_name = "test_panel"
interval_file = file(params.test_data['homo_sapiens']['genome']['genome_interval_list'], checkIfExists: true) interval_file = file(params.test_data['homo_sapiens']['genome']['genome_interval_list'], checkIfExists: true)
GATK_CREATE_SOM_PON ( ch_mutect2_in, fasta, fastaidx, dict, pon_name, interval_file ) GATK_CREATE_SOM_PON ( ch_mutect2_in, fasta, fai, dict, pon_name, interval_file )
} }