diff --git a/subworkflows/nf-core/gatk_tumor_normal_somatic_variant_calling/main.nf b/subworkflows/nf-core/gatk_tumor_normal_somatic_variant_calling/main.nf new file mode 100644 index 00000000..25c63687 --- /dev/null +++ b/subworkflows/nf-core/gatk_tumor_normal_somatic_variant_calling/main.nf @@ -0,0 +1,109 @@ +// +// Run GATK mutect2 in tumor normal mode, getepileupsummaries, calculatecontamination, learnreadorientationmodel and filtermutectcalls +// + +params.mutect2_options = [:] +params.learnorientation_options = [:] +params.getpileup_tumor_options = [suffix: '_tumor'] +params.getpileup_normal_options = [suffix: '_normal'] +params.calccontam_options = [:] +params.filtercalls_options = [suffix: '_filtered'] + +include { GATK4_MUTECT2 as MUTECT2 } from '../../../modules/gatk4/mutect2/main' addParams( options: params.mutect2_options ) +include { GATK4_LEARNREADORIENTATIONMODEL as LEARNREADORIENTATIONMODEL } from '../../../modules/gatk4/learnreadorientationmodel/main' addParams( options: params.learnorientation_options ) +include { GATK4_GETPILEUPSUMMARIES as GETPILEUPSUMMARIES_TUMOR } from '../../../modules/gatk4/getpileupsummaries/main' addParams( options: params.getpileup_tumor_options ) +include { GATK4_GETPILEUPSUMMARIES as GETPILEUPSUMMARIES_NORMAL} from '../../../modules/gatk4/getpileupsummaries/main' addParams( options: params.getpileup_normal_options ) +include { GATK4_CALCULATECONTAMINATION as CALCULATECONTAMINATION } from '../../../modules/gatk4/calculatecontamination/main' addParams( options: params.calccontam_options ) +include { GATK4_FILTERMUTECTCALLS as FILTERMUTECTCALLS } from '../../../modules/gatk4/filtermutectcalls/main' addParams( options: params.filtercalls_options ) + +workflow GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING { + take: + input // channel: [ val(meta), [ input ], [ input_index ], [which_norm] ] + fasta // channel: /path/to/reference/fasta + fai // channel: /path/to/reference/fasta/index + dict // channel: /path/to/reference/fasta/dictionary + germline_resource // channel: /path/to/germline/resource + germline_resource_tbi // channel: /path/to/germline/index + panel_of_normals // channel: /path/to/panel/of/normals + panel_of_normals_tbi // channel: /path/to/panel/of/normals/index + interval_file // channel: /path/to/interval/file + + + main: + ch_versions = Channel.empty() + + // + //Perform variant calling using mutect2 module in tumor single mode. + // + mutect2_input = channel.from(input) + MUTECT2 ( mutect2_input, false, false, false, [], fasta, fai, dict, germline_resource, germline_resource_tbi, panel_of_normals, panel_of_normals_tbi ) + ch_versions = ch_versions.mix(MUTECT2.out.versions) + + // + //Generate artifactpriors using learnreadorientationmodel on the f1r2 output of mutect2. + // + ch_learnread_in = MUTECT2.out.f1r2.collect() + LEARNREADORIENTATIONMODEL (ch_learnread_in) + ch_versions = ch_versions.mix(LEARNREADORIENTATIONMODEL.out.versions) + + // + //Generate pileup summary tables using getepileupsummaries. tumor sample should always be passed in as the first input and input list entries of ch_mutect2_in, + //to ensure correct file order for calculatecontamination. + // + pileup_tumor_input = channel.from(input).map { + meta, input_file, input_index, which_norm -> + [meta, input_file[0], input_index[0]] + } + + pileup_normal_input = channel.from(input).map { + meta, input_file, input_index, which_norm -> + [meta, input_file[1], input_index[1]] + } + GETPILEUPSUMMARIES_TUMOR ( pileup_tumor_input, germline_resource, germline_resource_tbi, interval_file ) + GETPILEUPSUMMARIES_NORMAL ( pileup_normal_input, germline_resource, germline_resource_tbi, interval_file ) + ch_versions = ch_versions.mix(GETPILEUPSUMMARIES_NORMAL.out.versions) + + // + //Contamination and segmentation tables created using calculatecontamination on the pileup summary table. + // + ch_pileup_tumor = GETPILEUPSUMMARIES_TUMOR.out.table.collect() + ch_pileup_normal = GETPILEUPSUMMARIES_NORMAL.out.table.collect() + ch_calccon_in = ch_pileup_tumor.combine(ch_pileup_normal, by: 0) + CALCULATECONTAMINATION ( ch_calccon_in, true ) + ch_versions = ch_versions.mix(CALCULATECONTAMINATION.out.versions) + + // + //Mutect2 calls filtered by filtermutectcalls using the artifactpriors, contamination and segmentation tables. + // + ch_vcf = MUTECT2.out.vcf.collect() + ch_tbi = MUTECT2.out.tbi.collect() + ch_stats = MUTECT2.out.stats.collect() + ch_orientation = LEARNREADORIENTATIONMODEL.out.artifactprior.collect() + ch_segment = CALCULATECONTAMINATION.out.segmentation.collect() + ch_contamination = CALCULATECONTAMINATION.out.contamination.collect() + //[] is used as a placeholder for optional input to specify the contamination estimate as a value, since the contamination table is used, this is not needed. + ch_contamination.add([]) + ch_filtermutect_in = ch_vcf.combine(ch_tbi, by: 0).combine(ch_stats, by: 0).combine(ch_orientation, by: 0).combine(ch_segment, by: 0).combine(ch_contamination, by: 0) + FILTERMUTECTCALLS ( ch_filtermutect_in, fasta, fai, dict ) + ch_versions = ch_versions.mix(FILTERMUTECTCALLS.out.versions) + + emit: + mutect2_vcf = MUTECT2.out.vcf.collect() // channel: [ val(meta), [ vcf ] ] + mutect2_tbi = MUTECT2.out.tbi.collect() // channel: [ val(meta), [ tbi ] ] + mutect2_stats = MUTECT2.out.stats.collect() // channel: [ val(meta), [ stats ] ] + mutect2_f1r2 = MUTECT2.out.f1r2.collect() // channel: [ val(meta), [ f1r2 ] ] + + artifact_priors = LEARNREADORIENTATIONMODEL.out.artifactprior.collect() // channel: [ val(meta), [ artifactprior ] ] + + pileup_table_tumor = GETPILEUPSUMMARIES_TUMOR.out.table.collect() // channel: [ val(meta), [ table_tumor ] ] + pileup_table_normal = GETPILEUPSUMMARIES_NORMAL.out.table.collect() // channel: [ val(meta), [ table_normal ] ] + + contamination_table = CALCULATECONTAMINATION.out.contamination.collect() // channel: [ val(meta), [ contamination ] ] + segmentation_table = CALCULATECONTAMINATION.out.segmentation.collect() // channel: [ val(meta), [ segmentation ] ] + + filtered_vcf = FILTERMUTECTCALLS.out.vcf.collect() // channel: [ val(meta), [ vcf ] ] + filtered_tbi = FILTERMUTECTCALLS.out.tbi.collect() // channel: [ val(meta), [ tbi ] ] + filtered_stats = FILTERMUTECTCALLS.out.stats.collect() // channel: [ val(meta), [ stats ] ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/gatk_tumor_normal_somatic_variant_calling/meta.yml b/subworkflows/nf-core/gatk_tumor_normal_somatic_variant_calling/meta.yml new file mode 100644 index 00000000..4c42addf --- /dev/null +++ b/subworkflows/nf-core/gatk_tumor_normal_somatic_variant_calling/meta.yml @@ -0,0 +1,127 @@ +name: gatk_tumor_normal_somatic_variant_calling +description: | + Perform variant calling on a paired tumor normal set of samples using mutect2 tumor normal mode. + f1r2 output of mutect2 is run through learnreadorientationmodel to get the artifact priors. + Run the input bam files through getpileupsummarries and then calculatecontamination to get the contamination and segmentation tables. + Filter the mutect2 output vcf using filtermutectcalls, artifact priors and the contamination & segmentation tables for additional filtering. +keywords: + - gatk4 + - mutect2 + - learnreadorientationmodel + - getpileupsummaries + - calculatecontamination + - filtermutectcalls + - variant_calling + - tumor_only + - filtered_vcf +modules: + - gatk4/mutect2 + - gatk4/learnreadorientationmodel + - gatk4/getpileupsummaries + - gatk4/calculatecontamination + - gatk4/filtermutectcalls +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - input: + type: list + description: list containing the tumor and normal BAM files, in that order, also able to take CRAM as an input + pattern: "[ *.{bam/cram} ]" + - input_index: + type: list + description: list containing the tumor and normal BAM file indexes, in that order, also able to take CRAM index as an input + pattern: "[ *.{bam.bai/cram.crai} ]" + - which_norm: + type: list + description: optional list of sample headers contained in the normal sample input file. + pattern: "testN" + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - fai: + type: file + description: Index of reference fasta file + pattern: "*.fasta.fai" + - dict: + type: file + description: GATK sequence dictionary + pattern: "*.dict" + - germline_resource: + type: file + description: Population vcf of germline sequencing, containing allele fractions. + pattern: "*.vcf.gz" + - germline_resource_tbi: + type: file + description: Index file for the germline resource. + pattern: "*.vcf.gz.tbi" + - panel_of_normals: + type: file + description: vcf file to be used as a panel of normals. + pattern: "*.vcf.gz" + - panel_of_normals_tbi: + type: file + description: Index for the panel of normals. + pattern: "*.vcf.gz.tbi" + - interval_file: + type: file + description: File containing intervals. + pattern: "*.interval_list" +output: + - versions: + type: file + description: File containing software versions + pattern: 'versions.yml' + - mutect2_vcf: + type: file + description: Compressed vcf file to be used for variant_calling. + pattern: "[ *.vcf.gz ]" + - mutect2_tbi: + type: file + description: Indexes of the mutect2_vcf file + pattern: "[ *vcf.gz.tbi ]" + - mutect2_stats: + type: file + description: Stats files for the mutect2 vcf + pattern: "[ *vcf.gz.stats ]" + - mutect2_f1r2: + type: file + description: file containing information to be passed to LearnReadOrientationModel. + pattern: "*.f1r2.tar.gz" + - artifact_priors: + type: file + description: file containing artifact-priors to be used by filtermutectcalls. + pattern: "*.tar.gz" + - pileup_table_tumor: + type: file + description: File containing the tumor pileup summary table, kept separate as calculatecontamination needs them individually specified. + pattern: "*_tumor.pileups.table" + - pileup_table_normal: + type: file + description: File containing the normal pileup summary table, kept separate as calculatecontamination needs them individually specified. + pattern: "*_normal.pileups.table" + - contamination_table: + type: file + description: File containing the contamination table. + pattern: "*.contamination.table" + - segmentation_table: + type: file + description: Output table containing segmentation of tumor minor allele fractions. + pattern: "*.segmentation.table" + - filtered_vcf: + type: file + description: file containing filtered mutect2 calls. + pattern: "*.vcf.gz" + - filtered_tbi: + type: file + description: tbi file that pairs with filtered vcf. + pattern: "*.vcf.gz.tbi" + - filtered_stats: + type: file + description: file containing statistics of the filtermutectcalls run. + pattern: "*.filteringStats.tsv" +authors: + - '@GCJMackenzie' diff --git a/subworkflows/nf-core/gatk_tumor_normal_somatic_variant_calling/nextflow.config b/subworkflows/nf-core/gatk_tumor_normal_somatic_variant_calling/nextflow.config new file mode 100644 index 00000000..bb8d1bc4 --- /dev/null +++ b/subworkflows/nf-core/gatk_tumor_normal_somatic_variant_calling/nextflow.config @@ -0,0 +1,6 @@ +params.mutect2_options = [:] +params.learnorientation_options = [:] +params.getpileup_tumor_options = [:] +params.getpileup_normal_options = [:] +params.calccontam_options = [:] +params.filtercalls_options = [:] diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 29d07639..9ed9f55c 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -522,7 +522,7 @@ gatk4/intervallisttools: - modules/gatk4/intervallisttools/** - tests/modules/gatk4/intervallisttools/** -gatk4/learnreadorientationmodel: +gatk4/learnreadorientationmodel: &gatk4/learnreadorientationmodel - modules/gatk4/learnreadorientationmodel/** - tests/modules/gatk4/learnreadorientationmodel/** @@ -1427,6 +1427,15 @@ subworkflows/gatk_create_som_pon: - *gatk4/genomicsdbimport - *gatk4/createsomaticpanelofnormals +subworkflows/gatk_tumor_normal_somatic_variant_calling: + - subworkflows/nf-core/gatk_tumor_normal_somatic_variant_calling/** + - tests/subworkflows/nf-core/gatk_tumor_normal_somatic_variant_calling/** + - *gatk4/mutect2 + - *gatk4/learnreadorientationmodel + - *gatk4/getpileupsummaries + - *gatk4/calculatecontamination + - *gatk4/filtermutectcalls + subworkflows/gatk_tumor_only_somatic_variant_calling: - subworkflows/nf-core/gatk_tumor_only_somatic_variant_calling/** - tests/subworkflows/nf-core/gatk_tumor_only_somatic_variant_calling/** diff --git a/tests/subworkflows/nf-core/gatk_tumor_normal_somatic_variant_calling/main.nf b/tests/subworkflows/nf-core/gatk_tumor_normal_somatic_variant_calling/main.nf new file mode 100644 index 00000000..21e35998 --- /dev/null +++ b/tests/subworkflows/nf-core/gatk_tumor_normal_somatic_variant_calling/main.nf @@ -0,0 +1,25 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING } from '../../../../subworkflows/nf-core/gatk_tumor_normal_somatic_variant_calling/main' addParams( [:] ) + +workflow test_gatk_tumor_normal_somatic_variant_calling { + input = [ + [ [ id:'test'], // meta map + [ file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam'], checkIfExists: true) , file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam'], checkIfExists: true)], + [ file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true) , file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true)], + ["testN"] + ] + ] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) + germline_resource = file(params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_vcf_gz'], checkIfExists: true) + germline_resource_tbi = file(params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_vcf_gz_tbi'], checkIfExists: true) + panel_of_normals = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_vcf_gz'], checkIfExists: true) + panel_of_normals_tbi = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_vcf_gz_tbi'], checkIfExists: true) + interval_file = file(params.test_data['homo_sapiens']['genome']['genome_interval_list'], checkIfExists: true) + + GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING ( input, fasta, fai, dict, germline_resource, germline_resource_tbi, panel_of_normals, panel_of_normals_tbi, interval_file ) +} diff --git a/tests/subworkflows/nf-core/gatk_tumor_normal_somatic_variant_calling/test.yml b/tests/subworkflows/nf-core/gatk_tumor_normal_somatic_variant_calling/test.yml new file mode 100644 index 00000000..3c6753fb --- /dev/null +++ b/tests/subworkflows/nf-core/gatk_tumor_normal_somatic_variant_calling/test.yml @@ -0,0 +1,34 @@ +- name: gatk_tumor_normal_somatic_variant_calling + command: nextflow run ./tests/subworkflows/nf-core/gatk_tumor_normal_somatic_variant_calling -entry test_gatk_tumor_normal_somatic_variant_calling -c tests/config/nextflow.config + tags: + - subworkflows/gatk_tumor_normal_somatic_variant_calling + # Modules + # - gatk4/mutect2 + # - gatk4/learnreadorientationmodel + # - gatk4/getpileupsummaries + # - gatk4/calculatecontamination + # - gatk4/filtermutectcalls + files: + # gatk4 mutect2 + - path: ./output/mutect2/test.vcf.gz + - path: ./output/mutect2/test.vcf.gz.stats + md5sum: 6ecb874e6a95aa48233587b876c2a7a9 + - path: ./output/mutect2/test.vcf.gz.tbi + - path: ./output/mutect2/test.f1r2.tar.gz + # gatk4 learnreadorientationmodel + - path: ./output/learnreadorientationmodel/test.tar.gz + # gatk4 getpileupsummaries + - path: ./output/getpileupsummaries/test_tumor.pileups.table + md5sum: 8b1b4c8ab831eca50ee9e940463a741f + - path: ./output/getpileupsummaries/test_normal.pileups.table + md5sum: 0d19674bef2ff0700d5b02b3463dd210 + # gatk4 calculatecontamination + - path: ./output/calculatecontamination/test.contamination.table + md5sum: 5fdcf1728cf98985ce31c038eb24e05c + - path: ./output/calculatecontamination/test.segmentation.table + md5sum: 91f28bfe4727a3256810927fc5eba92f + # gatk4 filtermutectcalls + - path: ./output/filtermutectcalls/test_filtered.vcf.gz + - path: ./output/filtermutectcalls/test_filtered.vcf.gz.filteringStats.tsv + md5sum: 98e1b87a52999eb8f429ef4a7877eb3f + - path: ./output/filtermutectcalls/test_filtered.vcf.gz.tbi