mirror of
https://github.com/MillironX/nf-core_modules.git
synced 2024-12-22 11:08:17 +00:00
Add gatk somatic tumour calling subworkflow (#1064)
* initial commit to set up new branch * save changes to checkout * workflow working, still needs test.yml and meta.yml, also fix versions file * subworkflow finished * Update pytest_subworkflows.yml * Update pytest_subworkflows.yml * Update pytest_subworkflows.yml * fix config subworkflow name * Update main.nf * Update pytest_subworkflows.yml * fixed md5sum issue likely caused by gatk version update * tumour changed to tumor * old dir deleted * Comments added to explain use of placeholders '[]' * updated index names, input channel renamed to input * Apply suggestions from code review * updated to perform new subworkflow testing Co-authored-by: GCJMackenzie <gavin.mackenzie@nibsc.org> Co-authored-by: Maxime U. Garcia <maxime.garcia@scilifelab.se>
This commit is contained in:
parent
2d4549122b
commit
071b1d50a8
6 changed files with 264 additions and 4 deletions
|
@ -0,0 +1,88 @@
|
||||||
|
//
|
||||||
|
// Run GATK mutect2 in tumor only mode, getepileupsummaries, calculatecontamination and filtermutectcalls
|
||||||
|
//
|
||||||
|
|
||||||
|
params.mutect2_options = [:]
|
||||||
|
params.getpileup_options = [:]
|
||||||
|
params.calccontam_options = [:]
|
||||||
|
params.filtercalls_options = [suffix: '_filtered']
|
||||||
|
|
||||||
|
include { GATK4_MUTECT2 as MUTECT2 } from '../../../modules/gatk4/mutect2/main' addParams( options: params.mutect2_options )
|
||||||
|
include { GATK4_GETPILEUPSUMMARIES as GETPILEUPSUMMARIES } from '../../../modules/gatk4/getpileupsummaries/main' addParams( options: params.getpileup_options )
|
||||||
|
include { GATK4_CALCULATECONTAMINATION as CALCULATECONTAMINATION } from '../../../modules/gatk4/calculatecontamination/main' addParams( options: params.calccontam_options )
|
||||||
|
include { GATK4_FILTERMUTECTCALLS as FILTERMUTECTCALLS } from '../../../modules/gatk4/filtermutectcalls/main' addParams( options: params.filtercalls_options )
|
||||||
|
|
||||||
|
workflow GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING {
|
||||||
|
take:
|
||||||
|
input // channel: [ val(meta), [ input ], [ input_index ], [] ]
|
||||||
|
fasta // channel: /path/to/reference/fasta
|
||||||
|
fai // channel: /path/to/reference/fasta/index
|
||||||
|
dict // channel: /path/to/reference/fasta/dictionary
|
||||||
|
germline_resource // channel: /path/to/germline/resource
|
||||||
|
germline_resource_tbi // channel: /path/to/germline/index
|
||||||
|
panel_of_normals // channel: /path/to/panel/of/normals
|
||||||
|
panel_of_normals_tbi // channel: /path/to/panel/of/normals/index
|
||||||
|
interval_file // channel: /path/to/interval/file
|
||||||
|
|
||||||
|
|
||||||
|
main:
|
||||||
|
ch_versions = Channel.empty()
|
||||||
|
mutect2_input = channel.from(input)
|
||||||
|
|
||||||
|
//
|
||||||
|
//Perform variant calling using mutect2 module in tumor single mode.
|
||||||
|
//
|
||||||
|
MUTECT2 ( mutect2_input , true , false , false , [] , fasta , fai , dict , germline_resource , germline_resource_tbi , panel_of_normals , panel_of_normals_tbi )
|
||||||
|
ch_versions = ch_versions.mix(MUTECT2.out.versions)
|
||||||
|
|
||||||
|
//
|
||||||
|
//Generate pileup summary table using getepileupsummaries.
|
||||||
|
//
|
||||||
|
pileup_input = channel.from(input).map {
|
||||||
|
meta, input_file, input_index, which_norm ->
|
||||||
|
[meta, input_file[0], input_index[0]]
|
||||||
|
}
|
||||||
|
GETPILEUPSUMMARIES ( pileup_input , germline_resource , germline_resource_tbi , interval_file )
|
||||||
|
ch_versions = ch_versions.mix(GETPILEUPSUMMARIES.out.versions)
|
||||||
|
|
||||||
|
//
|
||||||
|
//Contamination and segmentation tables created using calculatecontamination on the pileup summary table.
|
||||||
|
//
|
||||||
|
ch_pileup = GETPILEUPSUMMARIES.out.table.collect()
|
||||||
|
//[] is a placeholder for the optional input where the matched normal sample would be passed in for tumor-normal samples, which is not necessary for this workflow.
|
||||||
|
ch_pileup.add([])
|
||||||
|
CALCULATECONTAMINATION ( ch_pileup, true )
|
||||||
|
ch_versions = ch_versions.mix(CALCULATECONTAMINATION.out.versions)
|
||||||
|
|
||||||
|
//
|
||||||
|
//Mutect2 calls filtered by filtermutectcalls using the contamination and segmentation tables.
|
||||||
|
//
|
||||||
|
ch_vcf = MUTECT2.out.vcf.collect()
|
||||||
|
ch_tbi = MUTECT2.out.tbi.collect()
|
||||||
|
ch_stats = MUTECT2.out.stats.collect()
|
||||||
|
//[] is added as a placeholder for the optional input file artifact priors, which is only used for tumor-normal samples and therefor isn't needed in this workflow.
|
||||||
|
ch_stats.add([])
|
||||||
|
ch_segment = CALCULATECONTAMINATION.out.segmentation.collect()
|
||||||
|
ch_contamination = CALCULATECONTAMINATION.out.contamination.collect()
|
||||||
|
//[] is added as a placeholder for entering a contamination estimate value, which is not needed as this workflow uses the contamination table instead.
|
||||||
|
ch_contamination.add([])
|
||||||
|
ch_filtermutect_in = ch_vcf.combine(ch_tbi, by: 0).combine(ch_stats, by: 0).combine(ch_segment, by: 0).combine(ch_contamination, by: 0)
|
||||||
|
FILTERMUTECTCALLS ( ch_filtermutect_in, fasta, fai, dict )
|
||||||
|
ch_versions = ch_versions.mix(FILTERMUTECTCALLS.out.versions)
|
||||||
|
|
||||||
|
emit:
|
||||||
|
mutect2_vcf = MUTECT2.out.vcf.collect() // channel: [ val(meta), [ vcf ] ]
|
||||||
|
mutect2_index = MUTECT2.out.tbi.collect() // channel: [ val(meta), [ tbi ] ]
|
||||||
|
mutect2_stats = MUTECT2.out.stats.collect() // channel: [ val(meta), [ stats ] ]
|
||||||
|
|
||||||
|
pileup_table = GETPILEUPSUMMARIES.out.table.collect() // channel: [ val(meta), [ table ] ]
|
||||||
|
|
||||||
|
contamination_table = CALCULATECONTAMINATION.out.contamination.collect() // channel: [ val(meta), [ contamination ] ]
|
||||||
|
segmentation_table = CALCULATECONTAMINATION.out.segmentation.collect() // channel: [ val(meta), [ segmentation ] ]
|
||||||
|
|
||||||
|
filtered_vcf = FILTERMUTECTCALLS.out.vcf.collect() // channel: [ val(meta), [ vcf ] ]
|
||||||
|
filtered_index = FILTERMUTECTCALLS.out.tbi.collect() // channel: [ val(meta), [ tbi ] ]
|
||||||
|
filtered_stats = FILTERMUTECTCALLS.out.stats.collect() // channel: [ val(meta), [ stats ] ]
|
||||||
|
|
||||||
|
versions = ch_versions // channel: [ versions.yml ]
|
||||||
|
}
|
|
@ -0,0 +1,108 @@
|
||||||
|
name: gatk_tumor_only_somatic_variant_calling
|
||||||
|
description: |
|
||||||
|
Perform variant calling on a single tumor sample using mutect2 tumor only mode.
|
||||||
|
Run the input bam file through getpileupsummarries and then calculatecontaminationto get the contamination and segmentation tables.
|
||||||
|
Filter the mutect2 output vcf using filtermutectcalls and the contamination & segmentation tables for additional filtering.
|
||||||
|
keywords:
|
||||||
|
- gatk4
|
||||||
|
- mutect2
|
||||||
|
- getpileupsummaries
|
||||||
|
- calculatecontamination
|
||||||
|
- filtermutectcalls
|
||||||
|
- variant_calling
|
||||||
|
- tumor_only
|
||||||
|
- filtered_vcf
|
||||||
|
modules:
|
||||||
|
- gatk4/mutect2
|
||||||
|
- gatk4/getpileupsummaries
|
||||||
|
- gatk4/calculatecontamination
|
||||||
|
- gatk4/filtermutectcalls
|
||||||
|
input:
|
||||||
|
- meta:
|
||||||
|
type: map
|
||||||
|
description: |
|
||||||
|
Groovy Map containing sample information
|
||||||
|
e.g. [ id:'test' ]
|
||||||
|
- input:
|
||||||
|
type: list
|
||||||
|
description: list containing one BAM file, also able to take CRAM as an input
|
||||||
|
pattern: "[ *.{bam/cram} ]"
|
||||||
|
- input_index:
|
||||||
|
type: list
|
||||||
|
description: list containing one BAM file indexe, also able to take CRAM index as an input
|
||||||
|
pattern: "[ *.{bam.bai/cram.crai} ]"
|
||||||
|
- fasta:
|
||||||
|
type: file
|
||||||
|
description: The reference fasta file
|
||||||
|
pattern: "*.fasta"
|
||||||
|
- fai:
|
||||||
|
type: file
|
||||||
|
description: Index of reference fasta file
|
||||||
|
pattern: "*.fasta.fai"
|
||||||
|
- dict:
|
||||||
|
type: file
|
||||||
|
description: GATK sequence dictionary
|
||||||
|
pattern: "*.dict"
|
||||||
|
- germline_resource:
|
||||||
|
type: file
|
||||||
|
description: Population vcf of germline sequencing, containing allele fractions.
|
||||||
|
pattern: "*.vcf.gz"
|
||||||
|
- germline_resource_tbi:
|
||||||
|
type: file
|
||||||
|
description: Index file for the germline resource.
|
||||||
|
pattern: "*.vcf.gz.tbi"
|
||||||
|
- panel_of_normals:
|
||||||
|
type: file
|
||||||
|
description: vcf file to be used as a panel of normals.
|
||||||
|
pattern: "*.vcf.gz"
|
||||||
|
- panel_of_normals_tbi:
|
||||||
|
type: file
|
||||||
|
description: Index for the panel of normals.
|
||||||
|
pattern: "*.vcf.gz.tbi"
|
||||||
|
- interval_file:
|
||||||
|
type: file
|
||||||
|
description: File containing intervals.
|
||||||
|
pattern: "*.interval_list"
|
||||||
|
output:
|
||||||
|
- versions:
|
||||||
|
type: file
|
||||||
|
description: File containing software versions
|
||||||
|
pattern: 'versions.yml'
|
||||||
|
- mutect2_vcf:
|
||||||
|
type: file
|
||||||
|
description: Compressed vcf file to be used for variant_calling.
|
||||||
|
pattern: "[ *.vcf.gz ]"
|
||||||
|
- mutect2_tbi:
|
||||||
|
type: file
|
||||||
|
description: Indexes of the mutect2_vcf file
|
||||||
|
pattern: "[ *vcf.gz.tbi ]"
|
||||||
|
- mutect2_stats:
|
||||||
|
type: file
|
||||||
|
description: Stats files for the mutect2 vcf
|
||||||
|
pattern: "[ *vcf.gz.stats ]"
|
||||||
|
- pileup_table:
|
||||||
|
type: file
|
||||||
|
description: File containing the pileup summary table.
|
||||||
|
pattern: "*.pileups.table"
|
||||||
|
- contamination_table:
|
||||||
|
type: file
|
||||||
|
description: File containing the contamination table.
|
||||||
|
pattern: "*.contamination.table"
|
||||||
|
- segmentation_table:
|
||||||
|
type: file
|
||||||
|
description: Output table containing segmentation of tumor minor allele fractions.
|
||||||
|
pattern: "*.segmentation.table"
|
||||||
|
- filtered_vcf:
|
||||||
|
type: file
|
||||||
|
description: file containing filtered mutect2 calls.
|
||||||
|
pattern: "*.vcf.gz"
|
||||||
|
- filtered_tbi:
|
||||||
|
type: file
|
||||||
|
description: tbi file that pairs with filtered vcf.
|
||||||
|
pattern: "*.vcf.gz.tbi"
|
||||||
|
- filtered_stats:
|
||||||
|
type: file
|
||||||
|
description: file containing statistics of the filtermutectcalls run.
|
||||||
|
pattern: "*.filteringStats.tsv"
|
||||||
|
authors:
|
||||||
|
- '@GCJMackenzie'
|
|
@ -0,0 +1,4 @@
|
||||||
|
params.mutect2_options = [:]
|
||||||
|
params.getpileup_options = [:]
|
||||||
|
params.calccontam_options = [:]
|
||||||
|
params.filtercalls_options = [:]
|
|
@ -474,7 +474,7 @@ gatk4/bedtointervallist:
|
||||||
- modules/gatk4/bedtointervallist/**
|
- modules/gatk4/bedtointervallist/**
|
||||||
- tests/modules/gatk4/bedtointervallist/**
|
- tests/modules/gatk4/bedtointervallist/**
|
||||||
|
|
||||||
gatk4/calculatecontamination:
|
gatk4/calculatecontamination: &gatk4/calculatecontamination
|
||||||
- modules/gatk4/calculatecontamination/**
|
- modules/gatk4/calculatecontamination/**
|
||||||
- tests/modules/gatk4/calculatecontamination/**
|
- tests/modules/gatk4/calculatecontamination/**
|
||||||
|
|
||||||
|
@ -494,7 +494,7 @@ gatk4/fastqtosam:
|
||||||
- modules/gatk4/fastqtosam/**
|
- modules/gatk4/fastqtosam/**
|
||||||
- tests/modules/gatk4/fastqtosam/**
|
- tests/modules/gatk4/fastqtosam/**
|
||||||
|
|
||||||
gatk4/filtermutectcalls:
|
gatk4/filtermutectcalls: &gatk4/filtermutectcalls
|
||||||
- modules/gatk4/filtermutectcalls/**
|
- modules/gatk4/filtermutectcalls/**
|
||||||
- tests/modules/gatk4/filtermutectcalls/**
|
- tests/modules/gatk4/filtermutectcalls/**
|
||||||
|
|
||||||
|
@ -506,7 +506,7 @@ gatk4/genotypegvcfs:
|
||||||
- modules/gatk4/genotypegvcfs/**
|
- modules/gatk4/genotypegvcfs/**
|
||||||
- tests/modules/gatk4/genotypegvcfs/**
|
- tests/modules/gatk4/genotypegvcfs/**
|
||||||
|
|
||||||
gatk4/getpileupsummaries:
|
gatk4/getpileupsummaries: &gatk4/getpileupsummaries
|
||||||
- modules/gatk4/getpileupsummaries/**
|
- modules/gatk4/getpileupsummaries/**
|
||||||
- tests/modules/gatk4/getpileupsummaries/**
|
- tests/modules/gatk4/getpileupsummaries/**
|
||||||
|
|
||||||
|
@ -538,7 +538,7 @@ gatk4/mergevcfs:
|
||||||
- modules/gatk4/mergevcfs/**
|
- modules/gatk4/mergevcfs/**
|
||||||
- tests/modules/gatk4/mergevcfs/**
|
- tests/modules/gatk4/mergevcfs/**
|
||||||
|
|
||||||
gatk4/mutect2:
|
gatk4/mutect2: &gatk4/mutect2
|
||||||
- modules/gatk4/mutect2/**
|
- modules/gatk4/mutect2/**
|
||||||
- tests/modules/gatk4/mutect2/**
|
- tests/modules/gatk4/mutect2/**
|
||||||
|
|
||||||
|
@ -1426,3 +1426,11 @@ subworkflows/gatk_create_som_pon:
|
||||||
- tests/subworkflows/nf-core/gatk_create_som_pon/**
|
- tests/subworkflows/nf-core/gatk_create_som_pon/**
|
||||||
- *gatk4/genomicsdbimport
|
- *gatk4/genomicsdbimport
|
||||||
- *gatk4/createsomaticpanelofnormals
|
- *gatk4/createsomaticpanelofnormals
|
||||||
|
|
||||||
|
subworkflows/gatk_tumor_only_somatic_variant_calling:
|
||||||
|
- subworkflows/nf-core/gatk_tumor_only_somatic_variant_calling/**
|
||||||
|
- tests/subworkflows/nf-core/gatk_tumor_only_somatic_variant_calling/**
|
||||||
|
- *gatk4/mutect2
|
||||||
|
- *gatk4/getpileupsummaries
|
||||||
|
- *gatk4/calculatecontamination
|
||||||
|
- *gatk4/filtermutectcalls
|
||||||
|
|
|
@ -0,0 +1,24 @@
|
||||||
|
#!/usr/bin/env nextflow
|
||||||
|
|
||||||
|
nextflow.enable.dsl = 2
|
||||||
|
|
||||||
|
include { GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING } from '../../../../subworkflows/nf-core/gatk_tumor_only_somatic_variant_calling/main' addParams( [:] )
|
||||||
|
|
||||||
|
workflow test_gatk_tumor_only_somatic_variant_calling {
|
||||||
|
input = [
|
||||||
|
[[ id:'test' ], // meta map
|
||||||
|
[file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam'], checkIfExists: true)],
|
||||||
|
[file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true)],
|
||||||
|
[] ]
|
||||||
|
]
|
||||||
|
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
|
||||||
|
fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
|
||||||
|
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
|
||||||
|
germline_resource = file(params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_vcf_gz'], checkIfExists: true)
|
||||||
|
germline_resource_tbi = file(params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_vcf_gz_tbi'], checkIfExists: true)
|
||||||
|
panel_of_normals = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_vcf_gz'], checkIfExists: true)
|
||||||
|
panel_of_normals_tbi = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_vcf_gz_tbi'], checkIfExists: true)
|
||||||
|
interval_file = file(params.test_data['homo_sapiens']['genome']['genome_interval_list'], checkIfExists: true)
|
||||||
|
|
||||||
|
GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING ( input, fasta, fai, dict, germline_resource, germline_resource_tbi, panel_of_normals, panel_of_normals_tbi, interval_file )
|
||||||
|
}
|
|
@ -0,0 +1,28 @@
|
||||||
|
- name: gatk_tumor_only_somatic_variant_calling
|
||||||
|
command: nextflow run ./tests/subworkflows/nf-core/gatk_tumor_only_somatic_variant_calling -entry test_gatk_tumor_only_somatic_variant_calling -c tests/config/nextflow.config
|
||||||
|
tags:
|
||||||
|
- subworkflows/gatk_tumor_only_somatic_variant_calling
|
||||||
|
# Modules
|
||||||
|
# - gatk4/mutect2
|
||||||
|
# - gatk4/getpileupsummaries
|
||||||
|
# - gatk4/calculatecontamination
|
||||||
|
# - gatk4/filtermutectcalls
|
||||||
|
files:
|
||||||
|
# gatk4 mutect2
|
||||||
|
- path: ./output/mutect2/test.vcf.gz
|
||||||
|
- path: ./output/mutect2/test.vcf.gz.stats
|
||||||
|
md5sum: 106c5828b02b906c97922618b6072169
|
||||||
|
- path: ./output/mutect2/test.vcf.gz.tbi
|
||||||
|
# gatk4 getpileupsummaries
|
||||||
|
- path: ./output/getpileupsummaries/test.pileups.table
|
||||||
|
md5sum: 8b1b4c8ab831eca50ee9e940463a741f
|
||||||
|
# gatk4 calculatecontamination
|
||||||
|
- path: ./output/calculatecontamination/test.contamination.table
|
||||||
|
md5sum: 5fdcf1728cf98985ce31c038eb24e05c
|
||||||
|
- path: ./output/calculatecontamination/test.segmentation.table
|
||||||
|
md5sum: 91f28bfe4727a3256810927fc5eba92f
|
||||||
|
# gatk4 filtermutectcalls
|
||||||
|
- path: ./output/filtermutectcalls/test_filtered.vcf.gz
|
||||||
|
- path: ./output/filtermutectcalls/test_filtered.vcf.gz.filteringStats.tsv
|
||||||
|
md5sum: 8731945490960546719ce4a71a151e4f
|
||||||
|
- path: ./output/filtermutectcalls/test_filtered.vcf.gz.tbi
|
Loading…
Reference in a new issue