mirror of
https://github.com/MillironX/nf-core_modules.git
synced 2024-12-21 18:58:16 +00:00
Add gatk somatic paired calling subworkflow (#1067)
* initial commit to setup branch * workflow finished * Update nextflow.config * tumour to tumor, getpileup passed as nomral and tumor * paired_somatic renamed to tumor_normal_somatic * Apply suggestions from code review Co-authored-by: Maxime U. Garcia <maxime.garcia@scilifelab.se> * Update subworkflows/nf-core/gatk_tumor_normal_somatic_variant_calling/main.nf Co-authored-by: Maxime U. Garcia <maxime.garcia@scilifelab.se> * updated index names in meta.yml * changed index file names in main script and test * Apply suggestions from code review Co-authored-by: Maxime U. Garcia <maxime.garcia@scilifelab.se> * Apply suggestions from code review * fixed bug from changes * Apply suggestions from code review * tests should now work after the yml update * Update pytest_modules.yml Co-authored-by: GCJMackenzie <gavin.mackenzie@nibsc.org> Co-authored-by: Maxime U. Garcia <maxime.garcia@scilifelab.se> Co-authored-by: Maxime U. Garcia <max.u.garcia@gmail.com>
This commit is contained in:
parent
071b1d50a8
commit
5b975cc20d
6 changed files with 311 additions and 1 deletions
|
@ -0,0 +1,109 @@
|
|||
//
|
||||
// Run GATK mutect2 in tumor normal mode, getepileupsummaries, calculatecontamination, learnreadorientationmodel and filtermutectcalls
|
||||
//
|
||||
|
||||
params.mutect2_options = [:]
|
||||
params.learnorientation_options = [:]
|
||||
params.getpileup_tumor_options = [suffix: '_tumor']
|
||||
params.getpileup_normal_options = [suffix: '_normal']
|
||||
params.calccontam_options = [:]
|
||||
params.filtercalls_options = [suffix: '_filtered']
|
||||
|
||||
include { GATK4_MUTECT2 as MUTECT2 } from '../../../modules/gatk4/mutect2/main' addParams( options: params.mutect2_options )
|
||||
include { GATK4_LEARNREADORIENTATIONMODEL as LEARNREADORIENTATIONMODEL } from '../../../modules/gatk4/learnreadorientationmodel/main' addParams( options: params.learnorientation_options )
|
||||
include { GATK4_GETPILEUPSUMMARIES as GETPILEUPSUMMARIES_TUMOR } from '../../../modules/gatk4/getpileupsummaries/main' addParams( options: params.getpileup_tumor_options )
|
||||
include { GATK4_GETPILEUPSUMMARIES as GETPILEUPSUMMARIES_NORMAL} from '../../../modules/gatk4/getpileupsummaries/main' addParams( options: params.getpileup_normal_options )
|
||||
include { GATK4_CALCULATECONTAMINATION as CALCULATECONTAMINATION } from '../../../modules/gatk4/calculatecontamination/main' addParams( options: params.calccontam_options )
|
||||
include { GATK4_FILTERMUTECTCALLS as FILTERMUTECTCALLS } from '../../../modules/gatk4/filtermutectcalls/main' addParams( options: params.filtercalls_options )
|
||||
|
||||
workflow GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING {
|
||||
take:
|
||||
input // channel: [ val(meta), [ input ], [ input_index ], [which_norm] ]
|
||||
fasta // channel: /path/to/reference/fasta
|
||||
fai // channel: /path/to/reference/fasta/index
|
||||
dict // channel: /path/to/reference/fasta/dictionary
|
||||
germline_resource // channel: /path/to/germline/resource
|
||||
germline_resource_tbi // channel: /path/to/germline/index
|
||||
panel_of_normals // channel: /path/to/panel/of/normals
|
||||
panel_of_normals_tbi // channel: /path/to/panel/of/normals/index
|
||||
interval_file // channel: /path/to/interval/file
|
||||
|
||||
|
||||
main:
|
||||
ch_versions = Channel.empty()
|
||||
|
||||
//
|
||||
//Perform variant calling using mutect2 module in tumor single mode.
|
||||
//
|
||||
mutect2_input = channel.from(input)
|
||||
MUTECT2 ( mutect2_input, false, false, false, [], fasta, fai, dict, germline_resource, germline_resource_tbi, panel_of_normals, panel_of_normals_tbi )
|
||||
ch_versions = ch_versions.mix(MUTECT2.out.versions)
|
||||
|
||||
//
|
||||
//Generate artifactpriors using learnreadorientationmodel on the f1r2 output of mutect2.
|
||||
//
|
||||
ch_learnread_in = MUTECT2.out.f1r2.collect()
|
||||
LEARNREADORIENTATIONMODEL (ch_learnread_in)
|
||||
ch_versions = ch_versions.mix(LEARNREADORIENTATIONMODEL.out.versions)
|
||||
|
||||
//
|
||||
//Generate pileup summary tables using getepileupsummaries. tumor sample should always be passed in as the first input and input list entries of ch_mutect2_in,
|
||||
//to ensure correct file order for calculatecontamination.
|
||||
//
|
||||
pileup_tumor_input = channel.from(input).map {
|
||||
meta, input_file, input_index, which_norm ->
|
||||
[meta, input_file[0], input_index[0]]
|
||||
}
|
||||
|
||||
pileup_normal_input = channel.from(input).map {
|
||||
meta, input_file, input_index, which_norm ->
|
||||
[meta, input_file[1], input_index[1]]
|
||||
}
|
||||
GETPILEUPSUMMARIES_TUMOR ( pileup_tumor_input, germline_resource, germline_resource_tbi, interval_file )
|
||||
GETPILEUPSUMMARIES_NORMAL ( pileup_normal_input, germline_resource, germline_resource_tbi, interval_file )
|
||||
ch_versions = ch_versions.mix(GETPILEUPSUMMARIES_NORMAL.out.versions)
|
||||
|
||||
//
|
||||
//Contamination and segmentation tables created using calculatecontamination on the pileup summary table.
|
||||
//
|
||||
ch_pileup_tumor = GETPILEUPSUMMARIES_TUMOR.out.table.collect()
|
||||
ch_pileup_normal = GETPILEUPSUMMARIES_NORMAL.out.table.collect()
|
||||
ch_calccon_in = ch_pileup_tumor.combine(ch_pileup_normal, by: 0)
|
||||
CALCULATECONTAMINATION ( ch_calccon_in, true )
|
||||
ch_versions = ch_versions.mix(CALCULATECONTAMINATION.out.versions)
|
||||
|
||||
//
|
||||
//Mutect2 calls filtered by filtermutectcalls using the artifactpriors, contamination and segmentation tables.
|
||||
//
|
||||
ch_vcf = MUTECT2.out.vcf.collect()
|
||||
ch_tbi = MUTECT2.out.tbi.collect()
|
||||
ch_stats = MUTECT2.out.stats.collect()
|
||||
ch_orientation = LEARNREADORIENTATIONMODEL.out.artifactprior.collect()
|
||||
ch_segment = CALCULATECONTAMINATION.out.segmentation.collect()
|
||||
ch_contamination = CALCULATECONTAMINATION.out.contamination.collect()
|
||||
//[] is used as a placeholder for optional input to specify the contamination estimate as a value, since the contamination table is used, this is not needed.
|
||||
ch_contamination.add([])
|
||||
ch_filtermutect_in = ch_vcf.combine(ch_tbi, by: 0).combine(ch_stats, by: 0).combine(ch_orientation, by: 0).combine(ch_segment, by: 0).combine(ch_contamination, by: 0)
|
||||
FILTERMUTECTCALLS ( ch_filtermutect_in, fasta, fai, dict )
|
||||
ch_versions = ch_versions.mix(FILTERMUTECTCALLS.out.versions)
|
||||
|
||||
emit:
|
||||
mutect2_vcf = MUTECT2.out.vcf.collect() // channel: [ val(meta), [ vcf ] ]
|
||||
mutect2_tbi = MUTECT2.out.tbi.collect() // channel: [ val(meta), [ tbi ] ]
|
||||
mutect2_stats = MUTECT2.out.stats.collect() // channel: [ val(meta), [ stats ] ]
|
||||
mutect2_f1r2 = MUTECT2.out.f1r2.collect() // channel: [ val(meta), [ f1r2 ] ]
|
||||
|
||||
artifact_priors = LEARNREADORIENTATIONMODEL.out.artifactprior.collect() // channel: [ val(meta), [ artifactprior ] ]
|
||||
|
||||
pileup_table_tumor = GETPILEUPSUMMARIES_TUMOR.out.table.collect() // channel: [ val(meta), [ table_tumor ] ]
|
||||
pileup_table_normal = GETPILEUPSUMMARIES_NORMAL.out.table.collect() // channel: [ val(meta), [ table_normal ] ]
|
||||
|
||||
contamination_table = CALCULATECONTAMINATION.out.contamination.collect() // channel: [ val(meta), [ contamination ] ]
|
||||
segmentation_table = CALCULATECONTAMINATION.out.segmentation.collect() // channel: [ val(meta), [ segmentation ] ]
|
||||
|
||||
filtered_vcf = FILTERMUTECTCALLS.out.vcf.collect() // channel: [ val(meta), [ vcf ] ]
|
||||
filtered_tbi = FILTERMUTECTCALLS.out.tbi.collect() // channel: [ val(meta), [ tbi ] ]
|
||||
filtered_stats = FILTERMUTECTCALLS.out.stats.collect() // channel: [ val(meta), [ stats ] ]
|
||||
|
||||
versions = ch_versions // channel: [ versions.yml ]
|
||||
}
|
|
@ -0,0 +1,127 @@
|
|||
name: gatk_tumor_normal_somatic_variant_calling
|
||||
description: |
|
||||
Perform variant calling on a paired tumor normal set of samples using mutect2 tumor normal mode.
|
||||
f1r2 output of mutect2 is run through learnreadorientationmodel to get the artifact priors.
|
||||
Run the input bam files through getpileupsummarries and then calculatecontamination to get the contamination and segmentation tables.
|
||||
Filter the mutect2 output vcf using filtermutectcalls, artifact priors and the contamination & segmentation tables for additional filtering.
|
||||
keywords:
|
||||
- gatk4
|
||||
- mutect2
|
||||
- learnreadorientationmodel
|
||||
- getpileupsummaries
|
||||
- calculatecontamination
|
||||
- filtermutectcalls
|
||||
- variant_calling
|
||||
- tumor_only
|
||||
- filtered_vcf
|
||||
modules:
|
||||
- gatk4/mutect2
|
||||
- gatk4/learnreadorientationmodel
|
||||
- gatk4/getpileupsummaries
|
||||
- gatk4/calculatecontamination
|
||||
- gatk4/filtermutectcalls
|
||||
input:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test' ]
|
||||
- input:
|
||||
type: list
|
||||
description: list containing the tumor and normal BAM files, in that order, also able to take CRAM as an input
|
||||
pattern: "[ *.{bam/cram} ]"
|
||||
- input_index:
|
||||
type: list
|
||||
description: list containing the tumor and normal BAM file indexes, in that order, also able to take CRAM index as an input
|
||||
pattern: "[ *.{bam.bai/cram.crai} ]"
|
||||
- which_norm:
|
||||
type: list
|
||||
description: optional list of sample headers contained in the normal sample input file.
|
||||
pattern: "testN"
|
||||
- fasta:
|
||||
type: file
|
||||
description: The reference fasta file
|
||||
pattern: "*.fasta"
|
||||
- fai:
|
||||
type: file
|
||||
description: Index of reference fasta file
|
||||
pattern: "*.fasta.fai"
|
||||
- dict:
|
||||
type: file
|
||||
description: GATK sequence dictionary
|
||||
pattern: "*.dict"
|
||||
- germline_resource:
|
||||
type: file
|
||||
description: Population vcf of germline sequencing, containing allele fractions.
|
||||
pattern: "*.vcf.gz"
|
||||
- germline_resource_tbi:
|
||||
type: file
|
||||
description: Index file for the germline resource.
|
||||
pattern: "*.vcf.gz.tbi"
|
||||
- panel_of_normals:
|
||||
type: file
|
||||
description: vcf file to be used as a panel of normals.
|
||||
pattern: "*.vcf.gz"
|
||||
- panel_of_normals_tbi:
|
||||
type: file
|
||||
description: Index for the panel of normals.
|
||||
pattern: "*.vcf.gz.tbi"
|
||||
- interval_file:
|
||||
type: file
|
||||
description: File containing intervals.
|
||||
pattern: "*.interval_list"
|
||||
output:
|
||||
- versions:
|
||||
type: file
|
||||
description: File containing software versions
|
||||
pattern: 'versions.yml'
|
||||
- mutect2_vcf:
|
||||
type: file
|
||||
description: Compressed vcf file to be used for variant_calling.
|
||||
pattern: "[ *.vcf.gz ]"
|
||||
- mutect2_tbi:
|
||||
type: file
|
||||
description: Indexes of the mutect2_vcf file
|
||||
pattern: "[ *vcf.gz.tbi ]"
|
||||
- mutect2_stats:
|
||||
type: file
|
||||
description: Stats files for the mutect2 vcf
|
||||
pattern: "[ *vcf.gz.stats ]"
|
||||
- mutect2_f1r2:
|
||||
type: file
|
||||
description: file containing information to be passed to LearnReadOrientationModel.
|
||||
pattern: "*.f1r2.tar.gz"
|
||||
- artifact_priors:
|
||||
type: file
|
||||
description: file containing artifact-priors to be used by filtermutectcalls.
|
||||
pattern: "*.tar.gz"
|
||||
- pileup_table_tumor:
|
||||
type: file
|
||||
description: File containing the tumor pileup summary table, kept separate as calculatecontamination needs them individually specified.
|
||||
pattern: "*_tumor.pileups.table"
|
||||
- pileup_table_normal:
|
||||
type: file
|
||||
description: File containing the normal pileup summary table, kept separate as calculatecontamination needs them individually specified.
|
||||
pattern: "*_normal.pileups.table"
|
||||
- contamination_table:
|
||||
type: file
|
||||
description: File containing the contamination table.
|
||||
pattern: "*.contamination.table"
|
||||
- segmentation_table:
|
||||
type: file
|
||||
description: Output table containing segmentation of tumor minor allele fractions.
|
||||
pattern: "*.segmentation.table"
|
||||
- filtered_vcf:
|
||||
type: file
|
||||
description: file containing filtered mutect2 calls.
|
||||
pattern: "*.vcf.gz"
|
||||
- filtered_tbi:
|
||||
type: file
|
||||
description: tbi file that pairs with filtered vcf.
|
||||
pattern: "*.vcf.gz.tbi"
|
||||
- filtered_stats:
|
||||
type: file
|
||||
description: file containing statistics of the filtermutectcalls run.
|
||||
pattern: "*.filteringStats.tsv"
|
||||
authors:
|
||||
- '@GCJMackenzie'
|
|
@ -0,0 +1,6 @@
|
|||
params.mutect2_options = [:]
|
||||
params.learnorientation_options = [:]
|
||||
params.getpileup_tumor_options = [:]
|
||||
params.getpileup_normal_options = [:]
|
||||
params.calccontam_options = [:]
|
||||
params.filtercalls_options = [:]
|
|
@ -522,7 +522,7 @@ gatk4/intervallisttools:
|
|||
- modules/gatk4/intervallisttools/**
|
||||
- tests/modules/gatk4/intervallisttools/**
|
||||
|
||||
gatk4/learnreadorientationmodel:
|
||||
gatk4/learnreadorientationmodel: &gatk4/learnreadorientationmodel
|
||||
- modules/gatk4/learnreadorientationmodel/**
|
||||
- tests/modules/gatk4/learnreadorientationmodel/**
|
||||
|
||||
|
@ -1427,6 +1427,15 @@ subworkflows/gatk_create_som_pon:
|
|||
- *gatk4/genomicsdbimport
|
||||
- *gatk4/createsomaticpanelofnormals
|
||||
|
||||
subworkflows/gatk_tumor_normal_somatic_variant_calling:
|
||||
- subworkflows/nf-core/gatk_tumor_normal_somatic_variant_calling/**
|
||||
- tests/subworkflows/nf-core/gatk_tumor_normal_somatic_variant_calling/**
|
||||
- *gatk4/mutect2
|
||||
- *gatk4/learnreadorientationmodel
|
||||
- *gatk4/getpileupsummaries
|
||||
- *gatk4/calculatecontamination
|
||||
- *gatk4/filtermutectcalls
|
||||
|
||||
subworkflows/gatk_tumor_only_somatic_variant_calling:
|
||||
- subworkflows/nf-core/gatk_tumor_only_somatic_variant_calling/**
|
||||
- tests/subworkflows/nf-core/gatk_tumor_only_somatic_variant_calling/**
|
||||
|
|
|
@ -0,0 +1,25 @@
|
|||
#!/usr/bin/env nextflow
|
||||
|
||||
nextflow.enable.dsl = 2
|
||||
|
||||
include { GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING } from '../../../../subworkflows/nf-core/gatk_tumor_normal_somatic_variant_calling/main' addParams( [:] )
|
||||
|
||||
workflow test_gatk_tumor_normal_somatic_variant_calling {
|
||||
input = [
|
||||
[ [ id:'test'], // meta map
|
||||
[ file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam'], checkIfExists: true) , file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam'], checkIfExists: true)],
|
||||
[ file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true) , file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true)],
|
||||
["testN"]
|
||||
]
|
||||
]
|
||||
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
|
||||
fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
|
||||
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
|
||||
germline_resource = file(params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_vcf_gz'], checkIfExists: true)
|
||||
germline_resource_tbi = file(params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_vcf_gz_tbi'], checkIfExists: true)
|
||||
panel_of_normals = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_vcf_gz'], checkIfExists: true)
|
||||
panel_of_normals_tbi = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_vcf_gz_tbi'], checkIfExists: true)
|
||||
interval_file = file(params.test_data['homo_sapiens']['genome']['genome_interval_list'], checkIfExists: true)
|
||||
|
||||
GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING ( input, fasta, fai, dict, germline_resource, germline_resource_tbi, panel_of_normals, panel_of_normals_tbi, interval_file )
|
||||
}
|
|
@ -0,0 +1,34 @@
|
|||
- name: gatk_tumor_normal_somatic_variant_calling
|
||||
command: nextflow run ./tests/subworkflows/nf-core/gatk_tumor_normal_somatic_variant_calling -entry test_gatk_tumor_normal_somatic_variant_calling -c tests/config/nextflow.config
|
||||
tags:
|
||||
- subworkflows/gatk_tumor_normal_somatic_variant_calling
|
||||
# Modules
|
||||
# - gatk4/mutect2
|
||||
# - gatk4/learnreadorientationmodel
|
||||
# - gatk4/getpileupsummaries
|
||||
# - gatk4/calculatecontamination
|
||||
# - gatk4/filtermutectcalls
|
||||
files:
|
||||
# gatk4 mutect2
|
||||
- path: ./output/mutect2/test.vcf.gz
|
||||
- path: ./output/mutect2/test.vcf.gz.stats
|
||||
md5sum: 6ecb874e6a95aa48233587b876c2a7a9
|
||||
- path: ./output/mutect2/test.vcf.gz.tbi
|
||||
- path: ./output/mutect2/test.f1r2.tar.gz
|
||||
# gatk4 learnreadorientationmodel
|
||||
- path: ./output/learnreadorientationmodel/test.tar.gz
|
||||
# gatk4 getpileupsummaries
|
||||
- path: ./output/getpileupsummaries/test_tumor.pileups.table
|
||||
md5sum: 8b1b4c8ab831eca50ee9e940463a741f
|
||||
- path: ./output/getpileupsummaries/test_normal.pileups.table
|
||||
md5sum: 0d19674bef2ff0700d5b02b3463dd210
|
||||
# gatk4 calculatecontamination
|
||||
- path: ./output/calculatecontamination/test.contamination.table
|
||||
md5sum: 5fdcf1728cf98985ce31c038eb24e05c
|
||||
- path: ./output/calculatecontamination/test.segmentation.table
|
||||
md5sum: 91f28bfe4727a3256810927fc5eba92f
|
||||
# gatk4 filtermutectcalls
|
||||
- path: ./output/filtermutectcalls/test_filtered.vcf.gz
|
||||
- path: ./output/filtermutectcalls/test_filtered.vcf.gz.filteringStats.tsv
|
||||
md5sum: 98e1b87a52999eb8f429ef4a7877eb3f
|
||||
- path: ./output/filtermutectcalls/test_filtered.vcf.gz.tbi
|
Loading…
Reference in a new issue